libname asie 'c:\sas files';
run;

/***************************************************************/
/** MAIN CODES (THE SUPPLEMENTAL CODES ARE IN A SEPERATE FILE)
/***************************************************************/

/***************************************************/
/** FIGURE 1 
/***************************************************/

/** distribution of number of mentions in the reports **/

data asie.comp_mention_stat1;
set asie.sec_competition_new; /** this is the connection file **/
byear=year(fiscalmonth);
bmonth=month(fiscalmonth);
if filingticker='GOOG' then filingticker='GOOGL';
if mentionedticker='GOOG' then mentionedticker='GOOGL';
if filingticker='UA' then filingticker='UAA';
if mentionedticker='UA' then mentionedticker='UAA';
if filingticker='CENT' then filingticker='CENTA';
if mentionedticker='CENT' then mentionedticker='CENTA';
keep filingticker mentionedticker byear bmonth;
if competitionsectionfound='True'; /** this condition is to get only reports with competition section **/
proc print;
run;

proc sort data=asie.comp_mention_stat1 nodupkey;
by filingticker mentionedticker byear bmonth;
run;

data asie.comp_mention_stat1a;
set asie.comp_mention_stat1;
if byear ge 1995 and byear le 2017;
run;

data asie.comp_mention_stat2;
set asie.comp_mention_stat1a;
if mentionedticker = '' then do;
mention=0;
end;
else do;
mention=1;
end;
proc print;
run;

proc sort data=asie.comp_mention_stat2;
by filingticker byear bmonth;
run;

proc means data=asie.comp_mention_stat2 noprint;
by filingticker byear bmonth;
var mention; 
output out=asie.comp_mention_stat2a sum=sum_mention;
proc print data=asie.comp_mention_stat2a;
run;

proc freq data=asie.comp_mention_stat2a;
tables sum_mention;
run;

data asie.aaa;
set asie.comp_mention_stat2a;
if sum_mention=0;
run;

proc sort data=asie.comp_mention_stat2a;
by descending sum_mention;
run;

proc print;
run;

/** distribution of number of times a firm is mentioned in other reports **/

data asie.simple_count1;
set asie.sec_competition_new;
byear=year(fiscalmonth);
bmonth=month(fiscalmonth);
if filingticker='GOOG' then filingticker='GOOGL';
if mentionedticker='GOOG' then mentionedticker='GOOGL';
if filingticker='UA' then filingticker='UAA';
if mentionedticker='UA' then mentionedticker='UAA';
if filingticker='CENT' then filingticker='CENTA';
if mentionedticker='CENT' then mentionedticker='CENTA';
keep filingticker mentionedticker byear bmonth;
if mentionedticker ne '';
keep filingticker mentionedticker byear bmonth;
proc print;
run;

proc sort data=asie.simple_count1 nodupkey;
by filingticker mentionedticker byear bmonth;
run;

data asie.simple_count2;
set asie.simple_count1;
if byear ge 1995 and byear le 2017;
run;

/** count only the mentioned firms **/

proc sort data=asie.simple_count2;
by mentionedticker byear bmonth;
run;

proc means data=asie.simple_count2 noprint;
by mentionedticker byear bmonth;
var byear; 
output out=asie.simple_count3 n=mention_count;
proc print data=asie.simple_count3;
run;

data asie.mentioned_firms1;
set asie.simple_count3;
keep mentionedticker;
run;

/** list of all mentioned firms **/

proc sort data=asie.mentioned_firms1 nodupkey;
by mentionedticker;
run;

/** add all months to all mentioned firms **/

proc print data=asie.months1; /** this is a monthly calendar file **/
run;

proc sql;
  create table asie.mentioned_firms2 as
  select a.*,b.*
    from asie.months1 as a, asie.mentioned_firms1 as b;
 run;

proc sort data=asie.mentioned_firms2;
by mentionedticker year month;
run;

/** merge the file with the mentiones to the file with all mentioned firms in all months **/

proc sql;
  create table asie.simple_count4 as
  select a.*,b.*
    from asie.mentioned_firms2 as a left join asie.simple_count3 as b
    on a.mentionedticker=b.mentionedticker 
    and a.year=b.byear
	and a.month=b.bmonth;
 run;

data asie.simple_count5;
set asie.simple_count4;
if mention_count=. then mention_count=0;
run;

/** sum all mentions in the last 12 months **/

proc sort data=asie.simple_count5;
by mentionedticker year month;
run;

data asie.simple_count6;
set asie.simple_count5;
l11mt=lag11(mentionedticker);
lag1mc=lag(mention_count);
lag2mc=lag2(mention_count);
lag3mc=lag3(mention_count);
lag4mc=lag4(mention_count);
lag5mc=lag5(mention_count);
lag6mc=lag6(mention_count);
lag7mc=lag7(mention_count);
lag8mc=lag8(mention_count);
lag9mc=lag9(mention_count);
lag10mc=lag10(mention_count);
lag11mc=lag11(mention_count);
if l11mt=mentionedticker then do;
simple_count=mention_count+lag1mc+lag2mc+lag3mc+lag4mc+lag5mc+lag6mc+lag7mc+lag8mc+lag9mc+lag10mc+lag11mc;
end;
drop byear bmonth _TYPE_ _FREQ_ l11mt lag1mc lag2mc lag3mc lag4mc lag5mc lag6mc lag7mc lag8mc lag9mc lag10mc lag11mc;
run;

proc print data=asie.simple_count6; 
run;

proc freq data=asie.simple_count6;
tables simple_count; 
run;

/** calendar months frequency **/

data asie.simple_count6_annual;
set asie.simple_count6;
if month=12;
run;

proc freq data=asie.simple_count6_annual;
tables simple_count;
run;

proc sort data=asie.simple_count6_annual;
by descending simple_count;
run;

proc print;
run;

/** joint distribution **/

proc means data=asie.comp_mention_stat2 noprint;
by filingticker byear bmonth;
var mention; 
output out=asie.comp_mention_stat2a sum=sum_mention;
proc print data=asie.comp_mention_stat2a;
run;

data asie.simple_count6_annual;
set asie.simple_count6;
if month=12;
proc print;
run;

proc sql;
  create table asie.joint_dist1 as
  select a.*,b.*
    from asie.comp_mention_stat2a as a left join asie.simple_count6_annual as b
    on a.filingticker=b.mentionedticker 
    and a.byear=b.year;
run;

data asie.joint_dist2;
set asie.joint_dist1;
if sum_mention = . then sum_mention=0;
if simple_count = . then simple_count=0;
run;

data asie.joint_dist3;
set asie.joint_dist2;
if sum_mention > 10 then sum_mention=10;
if simple_count > 10 then simple_count=10;
run;

proc freq data=asie.joint_dist3;
tables sum_mention*simple_count;
run;


/***************************************************/
/** FIGURE 2 
/***************************************************/

/** percentage of no mention firms **/

/** all firms **/

proc sort data=asie.no_mention_all; /** this is a file with no mention indicator with 3 month lag - all firms **/
by fyear fmonth;
run;

proc means data=asie.bbb noprint; 
by fyear fmonth;
var no_mention ; 
output out=asie.ccc mean=P_no_mention;
proc print;
run;

/** excluding microcap stocks **/

proc sort data=asie.no_mention_excl_microcap; /** this is a file with no mention indicator with 3 month lag - excluding microcap stocks **/
by fyear fmonth;
run;

proc means data=asie.page_rank_values_large noprint; 
by fyear fmonth;
var no_mention ; 
output out=asie.ddd mean=P_no_mention_exl;
proc print;
run;

/** merge the files **/

proc sql;
  create table asie.eee as
  select a.*,b.*
    from asie.ccc as a, asie.ddd as b
    where a.fyear=b.fyear
    and a.fmonth=b.fmonth;
 run;

ods excel file='/sas files/myfile.xlsx';
proc print data=asie.eee;
run;
ods excel close;


/***************************************************/
/** TABLE 1 
/***************************************************/

/** generate lags in simple counts **/

proc sort data=asie.simple_count6;
by mentionedticker year month;
run;

data asie.simple_count7; 
set asie.simple_count6;
 lt=lag3(mentionedticker);
 lag3simple_count=lag3(simple_count);
 if lt=mentionedticker then do;
 l3simple_count=lag3simple_count;
end;
drop lag3simple_count;
run;

data asie.simple_count7a;
set asie.simple_count7; 
keep mentionedticker year month l3simple_count;
run;

/** merge with firm characteristics **/

proc sql;
  create table asie.simple_count7b as
  select a.*,b.*
    from asie.firm_characteristics as a left join asie.simple_count7a as b /** firm characteristics are from CRSP and Compustat **/
    on a.tic=b.mentionedticker
    and a.year=b.year 
    and a.month=b.month;
 run;

data asie.simple_count7c;
set asie.simple_count7b;
if mentionedticker = '' then l3simple_count=0;
run;

/** remove microcap cap stocks **/

proc univariate data=asie.simple_count7c;
var l3simple_count;
run;

proc sql;
  create table asie.simple_count7f as
  select a.*,b.*
    from asie.simple_count7c as a left join asie.size_breakpoints_new as b /** size breakpoints are from Ken French's website **/
    on a.year=b.year 
    and a.month=b.month;
 run;

data asie.simple_count7g;
set asie.simple_count7f;
if l_size ne .;
l_size_x=l_size;
drop gr_decile;
run;

proc sort data=asie.simple_count7g;
by year month l_size;
run;

data asie.simple_count7h;
set asie.simple_count7g;
if l_size/1000 le p10 then do;
r_var=0;
end;
else do;
if l_size/1000 le p20 then do;
r_var=1;
end;
else do;
if l_size/1000 le p30 then do;
r_var=2;
end;
else do;
if l_size/1000 le p40 then do;
r_var=3;
end;
else do;
if l_size/1000 le p50 then do;
r_var=4;
end;
else do;
if l_size/1000 le p60 then do;
r_var=5;
end;
else do;
if l_size/1000 le p70 then do;
r_var=6;
end;
else do;
if l_size/1000 le p80 then do;
r_var=7;
end;
else do;
if l_size/1000 le p90 then do;
r_var=8;
end;
else do;
r_var=9;
end;
end;
end;
end;
end;
end;
end;
end;
end;
run;

proc freq data=asie.simple_count7h;
tables r_var;
run;

data asie.simple_count7i;
set asie.simple_count7h;
if abs(prc) ge 5;
if r_var ge 2;
if mentionedticker = '' then l3simple_count=0;
run;

proc univariate data=asie.simple_count7i; 
var ex_ret l3simple_count l_Size Mtbq l_Past6_ret Profitability3 Investment3 l_Beta l_Idiovol;
run;


*----------------------winsorizing outliers-----------------------------------------------------;
%macro outliers();

%let vars=  l_Size Mtbq l_Past6_ret Profitability3 Investment3 l_Beta l_Idiovol;

%let cvar=%scan(&vars,1);
%let pos=1;

%do %while (&cvar ne );

%put &cvar;

proc sort data=asie.simple_count7i; by year month; run;

proc means data=asie.simple_count7i min p1 p99 max median mean n noprint;
by year month;
var &cvar;
output out = asie.outliers 
p1 (&cvar) = p1_&cvar
p99 (&cvar) = p99_&cvar;
run;

proc sql;
create table asie.temp as select a.*, b.p1_&cvar, b.p99_&cvar
from asie.simple_count7i as a left join asie.outliers as b
on a.year=b.year and a.month=b.month;
quit;

data asie.simple_count7i; set asie.temp;
if &cvar<p1_&cvar and &cvar ne . and p1_&cvar ne . then &cvar=p1_&cvar;
if &cvar>p99_&cvar and &cvar ne . and p99_&cvar ne . then &cvar=p99_&cvar;
drop p1_&cvar p99_&cvar;
run;

%let pos=%eval(&pos+1);
%let cvar=%scan(&vars,&pos);

%end;

%mend outliers;


%outliers;

proc sort data=asie.simple_count7i; by ticker year month; run;

data asie.simple_count7j; set asie.simple_count7i; run;


data asie.simple_count7ja;
set asie.simple_count7j;
/**if r_var le 4;
if r_var ge 5;**/ 
run;

data asie.simple_count7jb;
set asie.simple_count7ja; 
/**if l3simple_count > 0;**/ /** this to exclude firms that are not mentioned at all **/
ex_ret100=ex_ret*100;
log_size=log(l_Size);
if Mtbq le 0 and Mtbq ne . then Mtbq=0.01;
if Mtbq ge 30 and Mtbq ne . then Mtbq=30;
log_Mtbq=log(Mtbq);
if l3simple_count ne .;
if l3simple_count = 0 then do;
no_mention=1;
end;
else do;
no_mention=0;
end;
run;


/** simple count distribution**/

data asie.ds_pagerank1;
set asie.simple_count7jb; 
log_sc=log(1+l3simple_count);
run;

proc univariate data=asie.ds_pagerank1;
var l3simple_count log_sc;
run;

/** descriptive statistics **/

data asie.ds_pagerank2;
set asie.ds_pagerank1;
if l3simple_count > 3  then l3simple_count=3;
run;

proc freq data=asie.ds_pagerank2;
tables l3simple_count;
run;

proc sort data=asie.ds_pagerank2;
by l3simple_count year month; 
run;

proc means data=asie.ds_pagerank2 noprint;
by l3simple_count year month;
var l_Size Mtbq l_Past6_ret Profitability3 Investment3 l_Beta l_Idiovol;
output out=asie.ds_pagerank3 mean=l_Size Mtbq l_Past6_ret Profitability3 Investment3 l_Beta l_Idiovol;
proc print data=asie.ds_pagerank3;
run;

proc sort data=asie.ds_pagerank3;
by l3simple_count year month; 
run;

proc means data=asie.ds_pagerank3 noprint;
by l3simple_count;
var l_Size Mtbq l_Past6_ret Profitability3 Investment3 l_Beta l_Idiovol;
output out=asie.ds_pagerank4 mean=l_Size Mtbq l_Past6_ret Profitability3 Investment3 l_Beta l_Idiovol;
proc print data=asie.ds_pagerank4;
run;

ods excel file='/sas files/myfile.xlsx';
proc print data=asie.ds_pagerank4;
run;
ods excel close;

/**proc univariate data=asie.simple_count6_annual;
var simple_count;
run;**/

/** correlations by month **/

data asie.corr_year1;
set asie.ds_pagerank1;
run;

proc sort data=asie.corr_year1;
by year month;
run;

proc corr data=asie.corr_year1 noprint out=asie.corr1;
by year month;
var log_sc log_size; /** change to log_size log_Mtbq l_Past6_ret Profitability3 Investment3 l_Beta l_Idiovol **/
run;

data asie.corr2;
set asie.corr1;
corr=log_sc;
if mod(_n_,5)=0;
drop _TYPE_ _NAME_;
run;

proc means data=asie.corr2 t noprint;
var corr; 
output out=asie.mean_corr mean=m_corr;
proc print data=asie.mean_corr;
run;

data asie.coor3;
set asie.corr2;
corr2=corr2*1;
run;

proc ttest;
var corr;
run;


/***************************************************/
/** TABLE 2 
/***************************************************/

/** competition measures **/

/** 1. Product market fluidity of Hoberg, Phillips, and Prabhala (2014). Source: https://hobergphillips.tuck.dartmouth.edu/industryconcen.htm **/

/** 2. Text-based competitiveness by Li, Lundholm, and Minnis (2013). Source: https://webuser.bus.umich.edu/feng/ **/

/** 3. Industry operating leverage is the  average ratio of the sum of operating costs and administrative expenses to total assets, by GIC and ff48. 
       The files are 'GIC_OL1' and 'ff48_OL1' from the code 'Code_Industry_Concentraion_OL' **/

/** 4. Industry concentration is the Herfindahl-Hirshman index for market share of sales in the industry, by GIC and ff48. 
       The files are 'GIC_HH' and 'ff48_HH' from the code 'Code_Industry_Concentraion_OL'

/** 5. Product similarity score of Hoberg, Phillips, and Prabhala (2014). Source: https://hobergphillips.tuck.dartmouth.edu/industryconcen.htm.
       Computed by the sum of a fimr's score with its peers, by the code 'Code_HP_SIM' **/


/** 1. Product market fluidity **/

proc import datafile='c:\sas files\FluidityData.txt' out=asie.FluidityData dbms=tab replace; 
   getnames=yes;
run;
proc print data=asie.FluidityData;
run;

data asie.permno_cusip_gvkey_tic_sic_2;
set asie.permno_cusip_gvkey_tic_sic; /** this file contains firm permno, cusip, gvkey, ticker, and sic from CRSP/Compustat **/
year_p=year(date);
month_p=month(date);
if year_p ge 1994 and year_p le 2017;
gvkey1=gvkey*1;
run;

proc sort data=asie.permno_cusip_gvkey_tic_sic_2;
by permno year_p month_p;
run;

data asie.FluidityData2;
set asie.FluidityData;
gvkey1=gvkey*1;
run;

proc sql;
  create table asie.FluidityData3 as
  select a.*,b.*
    from asie.FluidityData2 as a left join asie.permno_cusip_gvkey_tic_sic_2 as b
    on a.gvkey1=b.gvkey1
	and a.year=b.year_p;
 run; 

proc sort data=asie.FluidityData3 nodupkey;
by gvkey1 year;
run;

data asie.FluidityData4;
set asie.FluidityData3;
lp=lag(permno);
lagprodmktfluid=lag(prodmktfluid);
if lp=permno then do;
lprodmktfluid=lagprodmktfluid;
end;
proc print;
run;

/** merge with the simple count file **/

data asie.simple_count7jb_2;
set asie.simple_count7jb; 
if mentionedticker ne '';
run;

proc sql;
  create table asie.simple_count7jb_3 as
  select a.*,b.*
    from asie.simple_count7jb_2 as a left join asie.FluidityData4 as b
    on a.mentionedticker=b.tic
	and a.year=b.year;
 run; 

proc sort data=asie.simple_count7jb_3 nodupkey;
by mentionedticker year month;
run;


/** 2. Text-based competitiveness **/

proc print data=asie.LLM_Competition_data_Mat_2012;
run;

data asie.LLM_Competition_2;
set asie.LLM_Competition_data_Mat_2012;
gvkey1=gvkey*1;
run;

proc sql;
  create table asie.LLM_Competition_3 as
  select a.*,b.*
    from asie.LLM_Competition_2 as a left join asie.permno_cusip_gvkey_tic_sic_2 as b
    on a.gvkey1=b.gvkey1
	and a.year=b.year_p;
 run; 

proc sort data=asie.LLM_Competition_3 nodupkey;
by gvkey1 year;
run;

data asie.LLM_Competition_4;
set asie.LLM_Competition_3;
lp=lag(permno);
lagpctcomp=lag(pctcomp);
if lp=permno then do;
lpctcomp=lagpctcomp;
end;
proc print;
run;


/** merge with the simple count file **/

proc sql;
  create table asie.simple_count7jb_4 as
  select a.*,b.*
    from asie.simple_count7jb_3 as a left join asie.LLM_Competition_4 as b
    on a.mentionedticker=b.tic
	and a.year=b.year;
 run; 

proc sort data=asie.simple_count7jb_4 nodupkey;
by mentionedticker year month;
run;


/** 3. Industry operating leverage **/

/** add GIC **/

data asie.firm_gic_2;
set asie.firm_gic; /** this is a file with firm GICs **/
if gic ne .;
run;

proc sort data=asie.firm_gic_2 nodupkey;
by ticker;
run;

proc sql;
  create table asie.simple_count7jb_5 as
  select a.*,b.*
    from asie.simple_count7jb_4 as a left join asie.firm_gic_2 as b
    on a.mentionedticker=b.ticker;
 run;

/** add ff48 **/

data asie.ff48_2;
set asie.operating_leverage2;
if ff48 ne .;
keep tic year month ff48;
run;

proc sort data=asie.ff48_2 nodupkey;
by tic;
run;

proc sql;
  create table asie.simple_count7jb_6 as
  select a.*,b.*
    from asie.simple_count7jb_5 as a left join asie.ff48_2 as b
    on a.mentionedticker=b.tic;
 run;

/** add GIC OL **/

proc sql;
  create table asie.simple_count7jb_7 as
  select a.*,b.*
    from asie.simple_count7jb_6 as a left join asie.GIC_OL1 as b
    on a.GIC=b.GIC
    and a.year=b.year
    and a.month=b.month;
 run;

/** add ff48 OL **/

proc sql;
  create table asie.simple_count7jb_8 as
  select a.*,b.*
    from asie.simple_count7jb_7 as a left join asie.ff48_OL1 as b
    on a.ff48=b.ff48
    and a.year=b.year
    and a.month=b.month;
 run;


/** 4. Industry concentration **/

/** add GIC HH **/

proc sql;
  create table asie.simple_count7jb_9 as
  select a.*,b.*
    from asie.simple_count7jb_8 as a left join asie.GIC_HH as b
    on a.GIC=b.GIC
    and a.year=b.year
    and a.month=b.month;
 run;

/** add ff48 HH **/

proc sql;
  create table asie.simple_count7jb_10 as
  select a.*,b.*
    from asie.simple_count7jb_9 as a left join asie.ff48_HH as b
    on a.ff48=b.ff48
    and a.year=b.year
    and a.month=b.month;
 run;

/** 5. Product similarity score **/

proc sql;
  create table asie.simple_count7jb_11 as
  select a.*,b.*
    from asie.simple_count7jb_10 as a left join asie.sim3 as b
    on a.mentionedticker=b.tic
	and a.year=b.year;
 run; 


*----------------------winsorizing outliers-----------------------------------------------------;
%macro outliers();

%let vars=  pctcomp sim prodmktfluid;

%let cvar=%scan(&vars,1);
%let pos=1;

%do %while (&cvar ne );

%put &cvar;

proc sort data=asie.simple_count7jb_11; by year month; run;

proc means data=asie.simple_count7jb_11 min p1 p99 max median mean n noprint;
by year month;
var &cvar;
output out = asie.outliers 
p1 (&cvar) = p1_&cvar
p99 (&cvar) = p99_&cvar;
run;

proc sql;
create table asie.temp as select a.*, b.p1_&cvar, b.p99_&cvar
from asie.simple_count7jb_11 as a left join asie.outliers as b
on a.year=b.year and a.month=b.month;
quit;

data asie.simple_count7jb_11; set asie.temp;
if &cvar<p1_&cvar and &cvar ne . and p1_&cvar ne . then &cvar=p1_&cvar;
if &cvar>p99_&cvar and &cvar ne . and p99_&cvar ne . then &cvar=p99_&cvar;
drop p1_&cvar p99_&cvar;
run;

%let pos=%eval(&pos+1);
%let cvar=%scan(&vars,&pos);

%end;

%mend outliers;


%outliers;

proc sort data=asie.simple_count7jb_11; by permno year month; run;

data asie.simple_count7jb_12; set asie.simple_count7jb_11; run;


/** descriptive statistics **/

data asie.ds_pagerank1;
set asie.simple_count7jb_12; 
log_sc=log(1+l3simple_count);
run;

data asie.ds_pagerank2;
set asie.ds_pagerank1;
if l3simple_count > 3  then l3simple_count=3;
run;

proc freq data=asie.ds_pagerank2;
tables l3simple_count;
run;

proc sort data=asie.ds_pagerank2;
by l3simple_count year month; 
run;

proc means data=asie.ds_pagerank2 noprint;
by l3simple_count year month;
var pctcomp sim prodmktfluid ind_GIC_OL1 ind_ff48_OL1 HH_GIC_SALE HH_ff48_SALE;
output out=asie.ds_pagerank3 mean= pctcomp sim prodmktfluid ind_GIC_OL1 ind_ff48_OL1 HH_GIC_SALE HH_ff48_SALE;
proc print data=asie.ds_pagerank3;
run;

proc sort data=asie.ds_pagerank3;
by l3simple_count year month; 
run;

proc means data=asie.ds_pagerank3 noprint;
by l3simple_count;
var pctcomp sim prodmktfluid ind_GIC_OL1 ind_ff48_OL1 HH_GIC_SALE HH_ff48_SALE;
output out=asie.ds_pagerank4 mean= pctcomp sim prodmktfluid ind_GIC_OL1 ind_ff48_OL1 HH_GIC_SALE HH_ff48_SALE;
proc print data=asie.ds_pagerank4;
run;

ods excel file='/sas files/myfile.xlsx';
proc print data=asie.ds_pagerank4;
run;
ods excel close;


/** difference in means: 3+ minus 0 mentions **/

data asie.ds_pagerank3_a;
set asie.ds_pagerank3;
if l3simple_count=1 then delete;
if l3simple_count=2 then delete;
run;

proc ttest data=asie.ds_pagerank3_a;
class l3simple_count;
var pctcomp sim prodmktfluid ind_GIC_OL1 ind_ff48_OL1 HH_GIC_SALE HH_ff48_SALE;
run;


/** correlations by month **/

data asie.corr_year1;
set asie.ds_pagerank1;
run;

proc sort data=asie.corr_year1;
by year month;
run;

proc corr data=asie.corr_year1 noprint out=asie.corr1;
by year month;
var log_sc pctcomp; /** change to pctcomp sim prodmktfluid ind_GIC_OL1 ind_ff48_OL1 HH_GIC_SALE HH_ff48_SALE **/
run;

data asie.corr2;
set asie.corr1;
corr=log_sc;
if mod(_n_,5)=0;
drop _TYPE_ _NAME_;
run;

proc means data=asie.corr2 t noprint;
var corr; 
output out=asie.mean_corr mean=m_corr;
proc print data=asie.mean_corr;
run;

data asie.coor3;
set asie.corr2;
corr2=corr2*1;
run;

proc ttest;
var corr;
run;


/***************************************************/
/** TABLE 3 / FIGURE 4
/***************************************************/

data asie.simple_count7a;
set asie.simple_count7; 
keep mentionedticker year month l3simple_count;
run;

proc sql;
  create table asie.simple_count7b as
  select a.*,b.*
    from asie.firm_characteristics as a left join asie.simple_count7a as b
    on a.tic=b.mentionedticker
    and a.year=b.year 
    and a.month=b.month;
 run;

data asie.simple_count7c;
set asie.simple_count7b;
if mentionedticker = '' then l3simple_count=0;
run;

/** remove microcap cap stocsk **/

proc univariate data=asie.simple_count7c;
var l3simple_count;
run;

proc sql;
  create table asie.simple_count7f as
  select a.*,b.*
    from asie.simple_count7c as a left join asie.size_breakpoints_new as b 
    on a.year=b.year 
    and a.month=b.month;
 run;

data asie.simple_count7g;
set asie.simple_count7f;
if l_size ne .;
l_size_x=l_size;
drop gr_decile;
run;

proc sort data=asie.simple_count7g;
by year month l_size;
run;

data asie.simple_count7h;
set asie.simple_count7g;
if l_size/1000 le p10 then do;
r_var=0;
end;
else do;
if l_size/1000 le p20 then do;
r_var=1;
end;
else do;
if l_size/1000 le p30 then do;
r_var=2;
end;
else do;
if l_size/1000 le p40 then do;
r_var=3;
end;
else do;
if l_size/1000 le p50 then do;
r_var=4;
end;
else do;
if l_size/1000 le p60 then do;
r_var=5;
end;
else do;
if l_size/1000 le p70 then do;
r_var=6;
end;
else do;
if l_size/1000 le p80 then do;
r_var=7;
end;
else do;
if l_size/1000 le p90 then do;
r_var=8;
end;
else do;
r_var=9;
end;
end;
end;
end;
end;
end;
end;
end;
end;
run;

proc sort data=asie.simple_count7h;
by r_var year month;
run;

proc means data=asie.simple_count7h noprint;
by r_var year month;
var ex_ret/weight=l_size_x; 
output out=asie.simple_count7h_mean mean=size_exret;
proc print;
run;

proc sql;
  create table asie.simple_count7h_a as
  select a.*,b.*
    from asie.simple_count7h as a left join asie.simple_count7h_mean as b 
    on a.r_var=b.r_var
    and a.year=b.year 
    and a.month=b.month;
 run;

data asie.simple_count7h_b;
set asie.simple_count7h_a;
ex_ret_size=ex_ret-size_exret;
run;

/**proc rank groups=10 data=asie.qf6_full_b out=asie.qf6_full_c;
var l_size; 
ranks r_var;
by year month;
run;**/

proc freq data=asie.simple_count7h_b;
tables r_var;
run;

data asie.simple_count7i;
set asie.simple_count7h_b;
if abs(prc) ge 5;
if r_var ge 2;
run;


/***************************/
/** Single sort **/
/***************************/

proc univariate data=asie.simple_count7i;
var l3simple_count;
run;

proc freq data=asie.simple_count7i;
tables l3simple_count;
run;

data asie.simple_count7j;
set asie.simple_count7i;
alt_var=l3simple_count; 
if alt_var ne .;
run;

/** sorting mentions into 0,1,2,and 3+ groups **/

data asie.qf6_full_no_mention;
set asie.simple_count7j;
if l3simple_count=0;
gr_decile=-1;
run;

data asie.qf6_full_mention;
set asie.simple_count7j;
if l3simple_count>0;
if l_size ne .;
run;

proc freq data=asie.qf6_full_mention;
tables l3simple_count;
run;

data asie.qf6_full_mention_a;
set asie.qf6_full_mention;
if l3simple_count=1 then do;
gr_decile=0;
end;
else do;
if l3simple_count=2 then do;
gr_decile=1;
end;
else do;
gr_decile=2;
end;
end;
run;

proc freq data=asie.qf6_full_mention_a;
tables gr_decile;
run;

/**proc sort data=asie.qf6_full_mention;
by year month alt_var;
run;

proc rank groups=3 data=asie.qf6_full_mention out=asie.qf6_full_mention_a;
var alt_var; 
ranks gr_decile;
by year month;
run;**/

data asie.qf6_full_d;
set asie.qf6_full_no_mention asie.qf6_full_mention_a;
run;

/** Calculating average excess returns **/

data asie.qf7a;
set asie.qf6_full_d;
/**if l3simple_count > 0;**/ /** use this condition for only mentioned firms **/
run;

proc sort data=asie.qf7a;
by year month gr_decile;
run;

proc means data=asie.qf7a noprint;
by year month gr_decile;
var ex_ret/weight=l_size_x; /** change ex_ret to ex_ret_size for size-decile adjusted return **/
output out=asie.qf8 mean=mean_exret;
run;

/** Adding the factors **/

data asie.qf8a;
set asie.qf8;
if _freq_ ge 5;
run;

proc sql;
  create table asie.qf9 as
  select a.*,b.*
    from asie.qf8a as a, asie.factorsmonthly2017 as b /** the factors are from Ken French's website **/
    where a.year=b.year
    and a.month=b.month;
 run;

data asie.qf9a;
set asie.qf9;
if year ge 1994;
run;

proc sort data=asie.qf9a;
by year month gr_decile;
run;

data asie.qf9b;
set asie.qf9a;
by year month gr_decile;
if first.month then do x=0;
end;
x+1;
run;

proc sort data=asie.qf9b;
by descending year descending month descending x;
run;

data asie.qf9c;
set asie.qf9b;
by descending year descending month descending x;
if first.month then do y=x;
end;
y+0;
run;

proc sort data=asie.qf9c;
by year month gr_decile;
run;

data asie.qf10;
set asie.qf9c;
if y=4;
run;

proc sort data=asie.qf10; 
by gr_decile year month;
run;

proc means data=asie.qf10 noprint; /** this is the average portfolio size **/
by gr_decile;
var _freq_; 
output out=asie.qf10_port mean=firm_port;
proc print;
run;

proc sort data=asie.qf10; 
by gr_decile year month;
run;

proc reg data=asie.qf10;
by gr_decile;
model mean_exret=;
run;

/**proc reg data=asie.qf10;
by gr_decile;
model mean_exret=emkt;
run;

proc reg data=asie.qf10;
by gr_decile;
model mean_exret=emkt smb hml;
run;

proc reg data=asie.qf10;
by gr_decile;
model mean_exret=emkt smb hml mom;
run;

proc reg data=asie.qf10;
by gr_decile;
model mean_exret=emkt smb hml rmw cma;
run;**/

proc reg data=asie.qf10;
by gr_decile;
model mean_exret=emkt smb hml rmw cma mom;
run;


/** Mean return for the difference **/

proc sort data=asie.qf10;
by year month gr_decile;
run;

/** high mention vs no mention **/

data asie.qf11; 
set asie.qf10;
lag3_grd=lag3(gr_decile);
lag3_meanf=lag3(mean_exret);
lag3year=lag3(year);
if gr_decile=2 and lag3_grd=-1 and lag3year=year then
p_ret=mean_exret-lag3_meanf;
if gr_decile=2; 
run;

/** high mention vs low mention **/

data asie.qf11; 
set asie.qf10;
lag2_grd=lag2(gr_decile);
lag2_meanf=lag2(mean_exret);
lag2year=lag2(year);
if gr_decile=2 and lag2_grd=0 and lag2year=year then
p_ret=mean_exret-lag2_meanf;
if gr_decile=2; 
run;

proc sort data=asie.qf11;
by year month;
run;

proc reg data=asie.qf11;
model p_ret=;
run;

/**proc reg data=asie.qf11;
model p_ret=emkt;
run;

proc reg data=asie.qf11;
model p_ret=emkt smb hml;
run;

proc reg data=asie.qf11;
model p_ret=emkt smb hml mom;
run;

proc reg data=asie.qf11;
model p_ret=emkt smb hml rmw cma;
run;**/

proc reg data=asie.qf11;
model p_ret=emkt smb hml rmw cma mom;
run;


/** Time series returns and alphas **/

proc sort data=asie.qf11; 
by year month;
run;

data asie.pr1; /** this is for high minus no **/
set asie.qf11; 
FF6_alpha=p_ret-(0.00775*emkt-0.36988*smb-0.18419*hml-0.23361*RMW-0.11147*CMA-0.00721*MOM);
keep year month emkt p_ret FF6_alpha;
proc print data=asie.pr1;
run;

data asie.pr1; /** this is for high minus low **/
set asie.qf11; 
FF6_alpha=p_ret-(-0.07564*emkt-0.32075*smb-0.13069*hml-0.14799*RMW-0.22460*CMA+0.01907*MOM);
keep year month emkt p_ret FF6_alpha;
proc print data=asie.pr1;
run;

ods excel file='/sas files/aaa.xlsx';
proc print data=asie.pr1;
run;
ods excel close;


**********************************************************************************************************
* Double sorted portfolios 
**********************************************************************************************************;

data asie.qf5;
set asie.simple_count7h;
if abs(prc) ge 5;
if r_var ge 2;
an_var=l_size_x; /** change l_Size Mtbq l_Past6_ret Profitability3 Investment3 l_Beta l_Idiovol**/
if an_var ne .;
if l3simple_count ne .;
keep year month ex_ret an_var l3simple_count l_size_x l_size;
run;

proc sort data=asie.qf5; 
by year month an_var; 
run;

proc rank groups=3 data=asie.qf5 out=asie.qf6;
var an_var; 
ranks r_var;
by year month;
run;

data asie.qf6_full_no_mention;
set asie.qf6;
if l3simple_count=0;
gr_decile=-1;
run;

data asie.qf6_full_mention;
set asie.qf6;
if l3simple_count>0;
if l_size ne .;
run;

proc freq data=asie.qf6_full_mention;
tables l3simple_count;
run;

data asie.qf6_full_mention_a;
set asie.qf6_full_mention;
if l3simple_count=1 then do;
gr_decile=0;
end;
else do;
if l3simple_count=2 then do;
gr_decile=1;
end;
else do;
gr_decile=2;
end;
end;
run;

proc freq data=asie.qf6_full_mention_a;
tables gr_decile;
run;

/**proc sort data=asie.qf6_full_mention;
by year month alt_var;
run;

proc rank groups=3 data=asie.qf6_full_mention out=asie.qf6_full_mention_a;
var alt_var; 
ranks gr_decile;
by year month;
run;**/

data asie.qf6_full_d;
set asie.qf6_full_no_mention asie.qf6_full_mention_a;
run;

/** Calculating average excess returns **/

proc sort data=asie.qf6_full_d;
by year month r_var gr_decile;
run;

proc means data=asie.qf6_full_d noprint;
by year month r_var gr_decile;
var ex_ret/weight=l_size_x; 
output out=asie.qf8 mean=mean_exret;
run;

/** Adding the factors **/

data asie.qf8a;
set asie.qf8;
if _freq_ ge 5;
run;

proc sql;
  create table asie.qf9 as
  select a.*,b.*
    from asie.qf8a as a, asie.factorsmonthly2017 as b
    where a.year=b.year
    and a.month=b.month;
 run;

data asie.qf9a;
set asie.qf9;
if year ge 1994;
run;

proc sort data=asie.qf9a;
by year month r_var gr_decile;
run;

data asie.qf9b;
set asie.qf9a;
by year month r_var gr_decile;
if first.month then do x=0;
end;
x+1;
run;

proc sort data=asie.qf9b;
by descending year descending month descending x;
run;

data asie.qf9c;
set asie.qf9b;
by descending year descending month descending x;
if first.month then do y=x;
end;
y+0;
run;

proc sort data=asie.qf9c;
by year month gr_decile;
run;

data asie.qf10;
set asie.qf9c;
if y=12;
run;

proc sort data=asie.qf10; 
by gr_decile year month;
run;

proc means data=asie.qf10 noprint;
by gr_decile;
var _freq_; 
output out=asie.qf10_port mean=firm_port; /** this is the average portfolio size **/
proc print;
run;

proc sort data=asie.qf10; 
by r_var gr_decile year month;
run;

/** mean of the return across the an_var quintiles **/

proc sort data=asie.qf10;
by year month gr_decile;
run;

proc means data=asie.qf10 noprint;
by year month gr_decile;
var year month mean_exret emkt smb hml rmw cma mom;
output out=asie.qf10a mean=year month mean_exret emkt smb hml rmw cma mom;
run;

proc sort data=asie.qf10; 
by gr_decile year month;
run;

proc reg data=asie.qf10;
by gr_decile;
model mean_exret=;
run;

proc reg data=asie.qf10;
by gr_decile;
model mean_exret=emkt smb hml rmw cma mom;
run;

/** Mean return for the difference **/

proc sort data=asie.qf10a;
by year month gr_decile;
run;

/** high mention vs no mention **/

data asie.qf11; 
set asie.qf10a;
lag3_grd=lag3(gr_decile);
lag3_meanf=lag3(mean_exret);
lag3year=lag3(year);
if gr_decile=2 and lag3_grd=-1 and lag3year=year then
p_ret=mean_exret-lag3_meanf;
if gr_decile=2; 
run;

/** high mention vs low mention **/

data asie.qf11; 
set asie.qf10a;
lag2_grd=lag2(gr_decile);
lag2_meanf=lag2(mean_exret);
lag2year=lag2(year);
if gr_decile=2 and lag2_grd=0 and lag2year=year then
p_ret=mean_exret-lag2_meanf;
if gr_decile=2; 
run;

proc sort data=asie.qf11;
by year month;
run;

proc reg data=asie.qf11;
model p_ret=;
run;
quit;

proc reg data=asie.qf11;
model p_ret=emkt smb hml rmw cma mom;
run;
quit;


/** mean of the return of r_var/gr_decile portfolios **/

proc sort data=asie.qf10; 
by r_var gr_decile year month;
run;

proc reg data=asie.qf10;
by r_var gr_decile;
model mean_exret=;
run;

proc reg data=asie.qf10;
by r_var gr_decile;
model mean_exret=emkt smb hml rmw cma mom;
run;

/** mean return on each r_var quintile **/

proc sort data=asie.qf10;
by year month r_var gr_decile;
run;

/** high mention vs no mention **/

data asie.qf11; 
set asie.qf10;
lag3_grd=lag3(gr_decile);
lag3_meanf=lag3(mean_exret);
lag3year=lag3(year);
if gr_decile=2 and lag3_grd=-1 and lag3year=year then
p_ret=mean_exret-lag3_meanf;
if gr_decile=2; 
run;

/** high mention vs low mention **/

data asie.qf11; 
set asie.qf10;
lag2_grd=lag2(gr_decile);
lag2_meanf=lag2(mean_exret);
lag2year=lag2(year);
if gr_decile=2 and lag2_grd=0 and lag2year=year then
p_ret=mean_exret-lag2_meanf;
if gr_decile=2; 
run;

proc sort data=asie.qf11;
by r_var;
run;

proc reg data=asie.qf11;
by r_var;
model p_ret=;
run;
quit;

proc reg data=asie.qf11;
by r_var;
model p_ret=emkt smb hml rmw cma mom;
run;
quit;


/** Time series returns and alphas **/

proc sort data=asie.qf11; 
by year month;
run;

data asie.pr1; /** this is for high minus no **/
set asie.qf11; 
FF6_alpha=p_ret-(0.08834*emkt-0.03873*smb-0.42272*hml-0.30598*RMW-0.00964*CMA-0.15981*MOM);
keep year month emkt p_ret FF6_alpha;
proc print data=asie.pr1;
run;

data asie.pr1; /** this is for high minus low **/
set asie.qf11; 
FF6_alpha=p_ret-(-0.00009699*emkt-0.10010*smb-0.27733*hml-0.10244*RMW-0.00671*CMA-0.11427*MOM);
keep year month emkt p_ret FF6_alpha;
proc print data=asie.pr1;
run;

ods excel file='/sas files/aaa.xlsx';
proc print data=asie.pr1;
run;
ods excel close;


/***************************************************/
/** FIGURE 3 
/***************************************************/

/** leave each GIC firms at a time **/

data asie.sector_robust1;
set asie.firm_characteristics;
keep ticker year month gic;
if GIC=10; /** GIC goes from 10 to 60 by units of 5 **/
proc print;
run;

/** merge with the connection file **/

data asie.connections1;
set asie.sec_competition_new;
byear=year(fiscalmonth);
bmonth=month(fiscalmonth);
if filingticker='GOOG' then filingticker='GOOGL';
if mentionedticker='GOOG' then mentionedticker='GOOGL';
if filingticker='UA' then filingticker='UAA';
if mentionedticker='UA' then mentionedticker='UAA';
if filingticker='CENT' then filingticker='CENTA';
if mentionedticker='CENT' then mentionedticker='CENTA';
keep filingticker mentionedticker byear bmonth;
if mentionedticker ne '';
keep filingticker mentionedticker byear bmonth;
proc print;
run;

proc sort data=asie.connections1 nodupkey;
by filingticker mentionedticker byear bmonth;
run;

data asie.connections1a;
set asie.connections1;
if byear ge 1995 and byear le 2017;
run;

proc sql;
  create table asie.sector_connections2 as
  select a.*,b.*
    from asie.connections1a as a left join asie.sector_robust1 as b
    on a.filingticker=b.ticker 
    and a.byear=b.year
	and a.bmonth=b.month;
 run;

data asie.sector_connections3;
set asie.sector_connections2;
if ticker ne '';
proc print data=asie.sector_connections3;
run;

data asie.mentioned_by_sector;
set asie.sector_connections3;
if ticker ne '';
mentioned_by_sector=1;
keep mentionedticker byear mentioned_by_sector gic;
proc print data=asie.mentioned_by_sector;
run;

proc sort data=asie.mentioned_by_sector nodupkey;
by mentionedticker byear;
proc print;
run;

data asie.sector;
set asie.sector_robust1;
sector=1;
keep ticker year sector gic;
if month=12;
proc print data=asie.sector;
run;

data asie.main_file3_sector;
set asie.simple_count7i;
run;

proc sql;
  create table asie.main_file3_sector1 as  
  select a.*,b.*
    from asie.main_file3_sector as a left join asie.sector as b
    on a.mentionedticker=b.ticker 
    and a.year=b.year;
 run;

proc sql;
  create table asie.main_file3_sector_10 as
  select a.*,b.*
    from asie.main_file3_sector1 as a left join asie.mentioned_by_sector as b
    on a.mentionedticker=b.mentionedticker 
    and a.year=b.byear;
 run;

data asie.simple_count7ia;
set asie.main_file3_sector_10; /** change 10 to 60 by unit of 5 **/
if mentioned_by_sector ne 1 and sector ne 1; /** this is for excluding all firms from specific sectors and all the firms they mention **/
run;

***************************************************************
* Single sorted portfolios
***************************************************************;

data asie.simple_count7j;
set asie.simple_count7ia;
alt_var=l3simple_count; 
if alt_var ne .;
run;

/** sorting mentions into 0,1,2,and 3+ groups **/

data asie.qf6_full_no_mention;
set asie.simple_count7j;
if l3simple_count=0;
gr_decile=-1;
run;

data asie.qf6_full_mention;
set asie.simple_count7j;
if l3simple_count>0;
if l_size ne .;
run;

proc freq data=asie.qf6_full_mention;
tables l3simple_count;
run;

data asie.qf6_full_mention_a;
set asie.qf6_full_mention;
if l3simple_count=1 then do;
gr_decile=0;
end;
else do;
if l3simple_count=2 then do;
gr_decile=1;
end;
else do;
gr_decile=2;
end;
end;
run;

proc freq data=asie.qf6_full_mention_a;
tables gr_decile;
run;

/**proc sort data=asie.qf6_full_mention;
by year month alt_var;
run;

proc rank groups=3 data=asie.qf6_full_mention out=asie.qf6_full_mention_a;
var alt_var; 
ranks gr_decile;
by year month;
run;**/

data asie.qf6_full_d;
set asie.qf6_full_no_mention asie.qf6_full_mention_a;
run;

/** Calculating average excess returns **/

data asie.qf7a;
set asie.qf6_full_d;
/**if l3simple_count > 0;**/ /** use this condition for only mentioned firms **/
run;

proc sort data=asie.qf7a;
by year month gr_decile;
run;

proc means data=asie.qf7a noprint;
by year month gr_decile;
var ex_ret/weight=l_size_x; /** change ex_ret to ex_ret_size for size-decile adjusted return **/
output out=asie.qf8 mean=mean_exret;
run;

/** Adding the factors **/

data asie.qf8a;
set asie.qf8;
if _freq_ ge 5;
run;

proc sql;
  create table asie.qf9 as
  select a.*,b.*
    from asie.qf8a as a, asie.factorsmonthly2017 as b
    where a.year=b.year
    and a.month=b.month;
 run;

data asie.qf9a;
set asie.qf9;
if year ge 1994;
run;

proc sort data=asie.qf9a;
by year month gr_decile;
run;

data asie.qf9b;
set asie.qf9a;
by year month gr_decile;
if first.month then do x=0;
end;
x+1;
run;

proc sort data=asie.qf9b;
by descending year descending month descending x;
run;

data asie.qf9c;
set asie.qf9b;
by descending year descending month descending x;
if first.month then do y=x;
end;
y+0;
run;

proc sort data=asie.qf9c;
by year month gr_decile;
run;

data asie.qf10;
set asie.qf9c;
if y=4;
run;

proc sort data=asie.qf10; 
by gr_decile year month;
run;

proc means data=asie.qf10 noprint; /** this is the average portfolio size **/
by gr_decile;
var _freq_; 
output out=asie.qf10_port mean=firm_port;
proc print;
run;

proc sort data=asie.qf10; 
by gr_decile year month;
run;

proc reg data=asie.qf10;
by gr_decile;
model mean_exret=;
run;

/**proc reg data=asie.qf10;
by gr_decile;
model mean_exret=emkt;
run;

proc reg data=asie.qf10;
by gr_decile;
model mean_exret=emkt smb hml;
run;

proc reg data=asie.qf10;
by gr_decile;
model mean_exret=emkt smb hml mom;
run;

proc reg data=asie.qf10;
by gr_decile;
model mean_exret=emkt smb hml rmw cma;
run;**/

proc reg data=asie.qf10;
by gr_decile;
model mean_exret=emkt smb hml rmw cma mom;
run;


/** Mean return for the difference **/

proc sort data=asie.qf10;
by year month gr_decile;
run;

/** high mention vs no mention **/

data asie.qf11; /** this is for portfolio 5-0 **/
set asie.qf10;
lag3_grd=lag3(gr_decile);
lag3_meanf=lag3(mean_exret);
lag3year=lag3(year);
if gr_decile=2 and lag3_grd=-1 and lag3year=year then
p_ret=mean_exret-lag3_meanf;
if gr_decile=2; 
run;

/** high mention vs low mention **/

data asie.qf11; /** this is for portfolio 5-0 **/
set asie.qf10;
lag2_grd=lag2(gr_decile);
lag2_meanf=lag2(mean_exret);
lag2year=lag2(year);
if gr_decile=2 and lag2_grd=0 and lag2year=year then
p_ret=mean_exret-lag2_meanf;
if gr_decile=2; 
run;

proc sort data=asie.qf11;
by year month;
run;

proc reg data=asie.qf11;
model p_ret=;
run;

/**proc reg data=asie.qf11;
model p_ret=emkt;
run;

proc reg data=asie.qf11;
model p_ret=emkt smb hml;
run;

proc reg data=asie.qf11;
model p_ret=emkt smb hml mom;
run;

proc reg data=asie.qf11;
model p_ret=emkt smb hml rmw cma;
run;**/

proc reg data=asie.qf11;
model p_ret=emkt smb hml rmw cma mom;
run;


/***************************************************/
/** TABLE 4 
/***************************************************/

data asie.simple_count7a;
set asie.simple_count7_all; /** this file is from the code 'Code_SimpleCount_Base' **/
keep mentionedticker year month l3simple_count l3simple_mean_size l3simple_sum_size l3simple_size_count;
run;

/** merge with firm characteristics **/

proc sql;
  create table asie.simple_count7b as
  select a.*,b.*
    from asie.firm_characteristics as a left join asie.simple_count7a as b
    on a.tic=b.mentionedticker
    and a.year=b.year 
    and a.month=b.month;
 run;

data asie.simple_count7c;
set asie.simple_count7b;
if mentionedticker = '' then l3simple_count=0;
if mentionedticker = '' then l3simple_mean_size=.;
if mentionedticker = '' then l3simple_sum_size=.;
if mentionedticker = '' then l3simple_size_count=.;
run;

/** portfolio sort **/

proc univariate data=asie.simple_count7c;
var l3simple_count l3simple_mean_size l3simple_sum_size;
run;

proc sql;
  create table asie.simple_count7f as
  select a.*,b.*
    from asie.simple_count7c as a left join asie.size_breakpoints_new as b 
    on a.year=b.year 
    and a.month=b.month;
 run;

data asie.simple_count7g;
set asie.simple_count7f;
if l_size ne .;
l_size_x=l_size;
drop gr_decile;
run;

proc sort data=asie.simple_count7g;
by year month l_size;
run;

data asie.simple_count7h;
set asie.simple_count7g;
if l_size/1000 le p10 then do;
r_var=0;
end;
else do;
if l_size/1000 le p20 then do;
r_var=1;
end;
else do;
if l_size/1000 le p30 then do;
r_var=2;
end;
else do;
if l_size/1000 le p40 then do;
r_var=3;
end;
else do;
if l_size/1000 le p50 then do;
r_var=4;
end;
else do;
if l_size/1000 le p60 then do;
r_var=5;
end;
else do;
if l_size/1000 le p70 then do;
r_var=6;
end;
else do;
if l_size/1000 le p80 then do;
r_var=7;
end;
else do;
if l_size/1000 le p90 then do;
r_var=8;
end;
else do;
r_var=9;
end;
end;
end;
end;
end;
end;
end;
end;
end;
run;

proc freq data=asie.simple_count7h;
tables r_var;
run;

data asie.simple_count7i;
set asie.simple_count7h;
if abs(prc) ge 5;
if r_var ge 2;
run;

data asie.simple_count7j;
set asie.simple_count7i;
if mentionedticker = '' then l3simple_count=0;
if mentionedticker = '' then l3simple_mean_size=0;
if mentionedticker = '' then l3simple_sum_size=0;
if mentionedticker = '' then l3simple_max_size=0;
if mentionedticker = '' then l3simple_size_count=0;
run;


/***************************/
/** Single sort **/
/***************************/

proc univariate data=asie.simple_count7j;
var l3simple_count l3simple_mean_size l3simple_sum_size;
run;

proc freq data=asie.simple_count7j;
tables l3simple_count;
run;

data asie.simple_count7k;
set asie.simple_count7j;
alt_var=l3simple_count; 
if alt_var ne .;
run;

/** sorting mentions into 0,1,2,and 3+ groups **/

data asie.qf6_full_no_mention;
set asie.simple_count7k;
if l3simple_count=0;
gr_decile=-1;
run;

data asie.qf6_full_mention;
set asie.simple_count7k;
if l3simple_count>0;
if l3simple_mean_size>l_size; /** this is the admiring/targeting classification **/
if l3simple_mean_size ne .;
if l_size ne .;
run;

proc freq data=asie.qf6_full_mention;
tables l3simple_count;
run;

data asie.qf6_full_mention_a;
set asie.qf6_full_mention;
if l3simple_count=1 then do;
gr_decile=0;
end;
else do;
if l3simple_count=2 then do;
gr_decile=1;
end;
else do;
gr_decile=2;
end;
end;
run;

proc freq data=asie.qf6_full_mention_a;
tables gr_decile;
run;

/**proc sort data=asie.qf6_full_mention;
by year month alt_var;
run;

proc rank groups=3 data=asie.qf6_full_mention out=asie.qf6_full_mention_a;
var alt_var; 
ranks gr_decile;
by year month;
run;**/

data asie.qf6_full_d;
set asie.qf6_full_no_mention asie.qf6_full_mention_a;
run;

/** Calculating average excess returns **/

data asie.qf7a;
set asie.qf6_full_d;
/**if l3simple_count > 0;**/ /** use this condition for only mentioned firms **/
run;

proc sort data=asie.qf7a;
by year month gr_decile;
run;

proc means data=asie.qf7a noprint;
by year month gr_decile;
var ex_ret/weight=l_size_x; 
output out=asie.qf8 mean=mean_exret;
run;

/** Adding the factors **/

data asie.qf8a;
set asie.qf8;
if _freq_ ge 5;
run;

proc sql;
  create table asie.qf9 as
  select a.*,b.*
    from asie.qf8a as a, asie.factorsmonthly2017 as b
    where a.year=b.year
    and a.month=b.month;
 run;

data asie.qf9a;
set asie.qf9;
if year ge 1994;
run;

proc sort data=asie.qf9a;
by year month gr_decile;
run;

data asie.qf9b;
set asie.qf9a;
by year month gr_decile;
if first.month then do x=0;
end;
x+1;
run;

proc sort data=asie.qf9b;
by descending year descending month descending x;
run;

data asie.qf9c;
set asie.qf9b;
by descending year descending month descending x;
if first.month then do y=x;
end;
y+0;
run;

proc sort data=asie.qf9c;
by year month gr_decile;
run;

data asie.qf10;
set asie.qf9c;
if y=4;
run;

proc sort data=asie.qf10; 
by gr_decile year month;
run;

proc means data=asie.qf10 noprint; /** this is the average portfolio size **/
by gr_decile;
var _freq_; 
output out=asie.qf10_port mean=firm_port;
proc print;
run;

proc sort data=asie.qf10; 
by gr_decile year month;
run;

proc reg data=asie.qf10;
by gr_decile;
model mean_exret=;
run;

/**proc reg data=asie.qf10;
by gr_decile;
model mean_exret=emkt;
run;

proc reg data=asie.qf10;
by gr_decile;
model mean_exret=emkt smb hml;
run;

proc reg data=asie.qf10;
by gr_decile;
model mean_exret=emkt smb hml mom;
run;

proc reg data=asie.qf10;
by gr_decile;
model mean_exret=emkt smb hml rmw cma;
run;**/

proc reg data=asie.qf10;
by gr_decile;
model mean_exret=emkt smb hml rmw cma mom;
run;


/** Mean return for the difference **/

proc sort data=asie.qf10;
by year month gr_decile;
run;

/** high mention vs no mention **/

data asie.qf11; /** this is for portfolio 5-0 **/
set asie.qf10;
lag3_grd=lag3(gr_decile);
lag3_meanf=lag3(mean_exret);
lag3year=lag3(year);
if gr_decile=2 and lag3_grd=-1 and lag3year=year then
p_ret=mean_exret-lag3_meanf;
if gr_decile=2; 
run;

/** high mention vs low mention **/

data asie.qf11; /** this is for portfolio 5-0 **/
set asie.qf10;
lag2_grd=lag2(gr_decile);
lag2_meanf=lag2(mean_exret);
lag2year=lag2(year);
if gr_decile=2 and lag2_grd=0 and lag2year=year then
p_ret=mean_exret-lag2_meanf;
if gr_decile=2; 
run;

proc sort data=asie.qf11;
by year month;
run;

proc reg data=asie.qf11;
model p_ret=;
run;

/**proc reg data=asie.qf11;
model p_ret=emkt;
run;

proc reg data=asie.qf11;
model p_ret=emkt smb hml;
run;

proc reg data=asie.qf11;
model p_ret=emkt smb hml mom;
run;

proc reg data=asie.qf11;
model p_ret=emkt smb hml rmw cma;
run;**/

proc reg data=asie.qf11;
model p_ret=emkt smb hml rmw cma mom;
run;


/***************************************************/
/** TABLE 5 
/***************************************************/

data asie.simple_count7a;
set asie.simple_count7_all; 
keep mentionedticker year month l3simple_count l3simple_mean_size l3simple_sum_size l3simple_size_count;
run;

/** merge with firm characteristics **/

proc sql;
  create table asie.simple_count7b as
  select a.*,b.*
    from asie.firm_characteristics as a left join asie.simple_count7a as b
    on a.tic=b.mentionedticker
    and a.year=b.year 
    and a.month=b.month;
 run;

data asie.simple_count7c;
set asie.simple_count7b;
if mentionedticker = '' then l3simple_count=0;
if mentionedticker = '' then l3simple_mean_size=.;
if mentionedticker = '' then l3simple_sum_size=.;
if mentionedticker = '' then l3simple_size_count=.;
run;

/** remove microcap stocks **/

proc univariate data=asie.simple_count7c;
var l3simple_count l3simple_mean_size l3simple_sum_size;
run;

proc sql;
  create table asie.simple_count7f as
  select a.*,b.*
    from asie.simple_count7c as a left join asie.size_breakpoints_new as b 
    on a.year=b.year 
    and a.month=b.month;
 run;

data asie.simple_count7g;
set asie.simple_count7f;
if l_size ne .;
l_size_x=l_size;
drop gr_decile;
run;

proc sort data=asie.simple_count7g;
by year month l_size;
run;

data asie.simple_count7h;
set asie.simple_count7g;
if l_size/1000 le p10 then do;
r_var=0;
end;
else do;
if l_size/1000 le p20 then do;
r_var=1;
end;
else do;
if l_size/1000 le p30 then do;
r_var=2;
end;
else do;
if l_size/1000 le p40 then do;
r_var=3;
end;
else do;
if l_size/1000 le p50 then do;
r_var=4;
end;
else do;
if l_size/1000 le p60 then do;
r_var=5;
end;
else do;
if l_size/1000 le p70 then do;
r_var=6;
end;
else do;
if l_size/1000 le p80 then do;
r_var=7;
end;
else do;
if l_size/1000 le p90 then do;
r_var=8;
end;
else do;
r_var=9;
end;
end;
end;
end;
end;
end;
end;
end;
end;
run;

proc freq data=asie.simple_count7h;
tables r_var;
run;

data asie.simple_count7i;
set asie.simple_count7h;
if abs(prc) ge 5;
if r_var ge 2;
if mentionedticker = '' then l3simple_count=0;
if mentionedticker = '' then l3simple_mean_size=0;
if mentionedticker = '' then l3simple_sum_size=0;
if mentionedticker = '' then l3simple_max_size=0;
if mentionedticker = '' then l3simple_size_count=0;
run;

proc univariate data=asie.simple_count7i; 
var ex_ret l3simple_count l_Size Mtbq l_Past6_ret Profitability3 Investment3 l_Beta l_Idiovol;
run;


*----------------------winsorizing outliers-----------------------------------------------------;
%macro outliers();

%let vars=  l_Size Mtbq l_Past6_ret Profitability3 Investment3 l_Beta l_Idiovol;

%let cvar=%scan(&vars,1);
%let pos=1;

%do %while (&cvar ne );

%put &cvar;

proc sort data=asie.simple_count7i; by year month; run;

proc means data=asie.simple_count7i min p1 p99 max median mean n noprint;
by year month;
var &cvar;
output out = asie.outliers 
p1 (&cvar) = p1_&cvar
p99 (&cvar) = p99_&cvar;
run;

proc sql;
create table asie.temp as select a.*, b.p1_&cvar, b.p99_&cvar
from asie.simple_count7i as a left join asie.outliers as b
on a.year=b.year and a.month=b.month;
quit;

data asie.simple_count7i; set asie.temp;
if &cvar<p1_&cvar and &cvar ne . and p1_&cvar ne . then &cvar=p1_&cvar;
if &cvar>p99_&cvar and &cvar ne . and p99_&cvar ne . then &cvar=p99_&cvar;
drop p1_&cvar p99_&cvar;
run;

%let pos=%eval(&pos+1);
%let cvar=%scan(&vars,&pos);

%end;

%mend outliers;


%outliers;

proc sort data=asie.simple_count7i; by permno year month; run;

data asie.simple_count7j; set asie.simple_count7i; run;

/** full sample **/
data asie.simple_count7ja;
set asie.simple_count7j;
/**if r_var le 4;
if r_var ge 5;**/ 
run;

/** targeting **/
data asie.simple_count7ja;
set asie.simple_count7j;
if l_size ne .;
if l3simple_mean_size = . then l3simple_count = 0; 
if l3simple_mean_size<l_size then l3simple_count = 0;
/**if r_var le 4;
if r_var ge 5;**/  
run;

/** admiring **/
data asie.simple_count7ja;
set asie.simple_count7j; 
if l_size ne .;
if l3simple_mean_size = . then l3simple_count = 0;  
if l3simple_mean_size>l_size then l3simple_count = 0;
/**if r_var le 4;
if r_var ge 5;**/  
run;


data asie.simple_count7jb;
set asie.simple_count7ja; 
/**if l3simple_count > 0;**/ /** this to exclude firms that are not mentioned at all **/
ex_ret100=ex_ret*100;
log_size=log(l_Size);
if Mtbq le 0 and Mtbq ne . then Mtbq=0.01;
if Mtbq ge 30 and Mtbq ne . then Mtbq=30;
log_Mtbq=log(Mtbq);
if l3simple_count ne .;
if l3simple_count = 0 then do;
no_mention=1;
end;
else do;
no_mention=0;
end;
run;

data asie.simple_count7jc;
set asie.simple_count7jb;
l_sc=log(1+l3simple_count); 
sc_no_mention=l_sc*no_mention;
size_no_mention=log_size*no_mention;
run;

proc sort data=asie.simple_count7jc;
by year month;
run;

ods listing close;
ods output parameterestimates=pe fitstatistics=adjrsq;
proc reg data=asie.simple_count7jc;
 by year month;
 model ex_ret100=l_sc no_mention size_no_mention/**target sc_target**/ log_size log_Mtbq l_Past6_ret Profitability3 Investment3 l_Beta l_Idiovol;
run;
quit;
ods listing;

proc means data=pe mean std t probt;
 var estimate; class variable;
run;

/** Newey-West correction **/

proc sort data=pe; by variable; run;

%let lags=12;
ods output parameterestimates=nw;
ods listing close;
proc model data=pe;
 by variable;
 instruments / intonly;
 estimate=a;
 fit estimate / gmm kernel=(bart,%eval(&lags+1),0); run;
quit;
ods listing;

proc print data=nw; id variable;
 var estimate--df; format estimate stderr 7.4;
run;

/** number of observations **/

data asie.obs1;
set asie.simple_count7jc;
if ex_ret100 ne .;
if l_sc ne .;
if no_mention ne .;
if size_no_mention ne .; 
if log_size ne .;
if log_Mtbq ne .;
if l_Past6_ret ne .;
if Profitability3 ne .;
if Investment3 ne .;
if l_Beta ne .;
if l_Idiovol;
run;

/** mean r-sqaure **/

data asie.rsq;
set adjrsq;
if label2='R-Square';
proc print;
run;

proc means data=asie.rsq noprint;
var nValue2;
output out=asie.rsq_mean mean=rsq_mean;
proc print;
run;


/***************************************************/
/** TABLE 6 / TABLE 11
/***************************************************/

/** get the simple mention count files from the codes 'Code_SimpleCount_Base' and 'Code_SimpleCount_ff48' **/

data asie.simple_count7a;
set asie.simple_count7_all; /** change simple_count7_all to simple_count7_inter and simple_count7_in 
                                                     and simple_count7_inter_ff48 and simple_count7_in_ff48 **/
keep mentionedticker year month l3simple_count l3simple_mean_size l3simple_sum_size l3simple_max_size l3simple_min_size l3simple_size_count;
run;

proc sort data=asie.simple_count7a;
by year month;
run;

proc means data=asie.simple_count7a noprint;
by year month;
var l3simple_sum_size; 
output out=asie.simple_count7a_median mean=max_size_median;
proc print data=asie.simple_count7a_median;
run;

/** merge with firm characteristics **/

proc sql;
  create table asie.simple_count7b as
  select a.*,b.*
    from asie.firm_characteristics as a left join asie.simple_count7a as b
    on a.tic=b.mentionedticker
    and a.year=b.year 
    and a.month=b.month;
 run;

data asie.simple_count7c;
set asie.simple_count7b;
if mentionedticker = '' then l3simple_count=0;
if mentionedticker = '' then l3simple_mean_size=0;
if mentionedticker = '' then l3simple_sum_size=0;
if mentionedticker = '' then l3simple_max_size=0;
if mentionedticker = '' then l3simple_size_count=0;
run;


/** remove microcap cap stocks **/

proc univariate data=asie.simple_count7c;
var l3simple_count l3simple_mean_size l3simple_sum_size;
run;

proc sql;
  create table asie.simple_count7f as
  select a.*,b.*
    from asie.simple_count7c as a left join asie.size_breakpoints_new as b 
    on a.year=b.year 
    and a.month=b.month;
 run;

data asie.simple_count7g;
set asie.simple_count7f;
if l_size ne .;
l_size_x=l_size;
run;

proc sort data=asie.simple_count7g;
by year month l_size;
run;

data asie.simple_count7h;
set asie.simple_count7g;
if l_size/1000 le p10 then do;
r_var=0;
end;
else do;
if l_size/1000 le p20 then do;
r_var=1;
end;
else do;
if l_size/1000 le p30 then do;
r_var=2;
end;
else do;
if l_size/1000 le p40 then do;
r_var=3;
end;
else do;
if l_size/1000 le p50 then do;
r_var=4;
end;
else do;
if l_size/1000 le p60 then do;
r_var=5;
end;
else do;
if l_size/1000 le p70 then do;
r_var=6;
end;
else do;
if l_size/1000 le p80 then do;
r_var=7;
end;
else do;
if l_size/1000 le p90 then do;
r_var=8;
end;
else do;
r_var=9;
end;
end;
end;
end;
end;
end;
end;
end;
end;
run;

proc freq data=asie.simple_count7h;
tables r_var;
run;

data asie.simple_count7i;
set asie.simple_count7h;
if abs(prc) ge 5;
if r_var ge 2;
run;

/*****************************************************************************/
/** portfolios sort
/*****************************************************************************/

/** Calculating average excess returns **/

data asie.qf7b;
set asie.simple_count7i;
if l3simple_count > 0;
if l3simple_mean_size>l_size; /** this is the admiring/targeting classification, and remove this line for all mentions **/
if l3simple_mean_size ne .;
if l_size ne .;
run;

proc sort data=asie.qf7b;
by year month;
run;

proc means data=asie.qf7b noprint;
by year month;
var ex_ret/weight=l_size; 
output out=asie.qf8 mean=mean_exret;
run;

/** Adding the factors **/

data asie.qf8a;
set asie.qf8;
if _freq_ ge 5;
run;

proc means data=asie.qf8a noprint; /** this is the average portfolio size **/
var _freq_; 
output out=asie.qf8a_port mean=firm_port;
proc print;
run;

proc sql;
  create table asie.qf9 as
  select a.*,b.*
    from asie.qf8a as a, asie.factorsmonthly2017 as b
    where a.year=b.year
    and a.month=b.month;
 run;

data asie.qf10;
set asie.qf9;
if year ge 1994;
run;

proc sort data=asie.qf10;
by year month;
run;

proc reg data=asie.qf10;
model mean_exret=emkt smb hml rmw cma mom;
run;
quit;


/** This is for only non-mentioned firms **/

data asie.qf7b; 
set asie.simple_count7i; 
if l3simple_count = 0 ;  
run;

proc sort data=asie.qf7b;
by year month;
run;

proc means data=asie.qf7b noprint;
by year month;
var ex_ret/weight=l_size; 
output out=asie.qf8 mean=mean_exret;
run;

/** Adding the factors **/

data asie.qf8a;
set asie.qf8;
if _freq_ ge 5;
run;

proc means data=asie.qf8a noprint; /** this is the average portfolio size **/
var _freq_; 
output out=asie.qf8a_port mean=firm_port;
proc print;
run;

proc sql;
  create table asie.qf9 as
  select a.*,b.*
    from asie.qf8a as a, asie.factorsmonthly2017 as b
    where a.year=b.year
    and a.month=b.month;
 run;

data asie.qf10;
set asie.qf9;
if year ge 1994;
run;

proc sort data=asie.qf10;
by year month;
run;

proc reg data=asie.qf10;
model mean_exret=emkt smb hml rmw cma mom;
run;
quit;


/***************************************************/
/** TABLE 7 
/***************************************************/

%let wrds = wrds.wharton.upenn.edu 4016; 
options comamid=TCP remote=WRDS;
signon username=_prompt_;

/** get ROE forecasts **/

rsubmit;
data det_XEPSUS; 
set ibes.DET_XEPSUS; 
run;

rsubmit;
data DET_XEPSUS2;
set DET_XEPSUS;
year_rev=year(REVDATS);
month_rev=month(REVDATS);
keep TICKER ANALYS REVDATS year_rev month_rev fpi;
if year_rev ge 1994;
run;

rsubmit;
proc sort data=DET_XEPSUS2 nodupkey;
by analys year_rev month_rev ticker;
run; 

rsubmit;
proc contents data=DET_XEPSUS2;
run;

libname asie 'c:\sas files';
run;

rsubmit;
proc upload data=asie.iclink_new out=iclink_new; 
run;

rsubmit;
proc sql; 
create table DET_XEPSUS3 
as select a.*, b.*
from DET_XEPSUS2 as a left join iclink_new as b
on a.ticker=b.ticker;
run;

rsubmit;
proc sort data=DET_XEPSUS3 nodupkey;
by analys year_rev month_rev ticker;
run; 

rsubmit;
proc download data=DET_XEPSUS3 out=asie.ibes_analyst; 
run;

/** add GIC industry **/

data asie.ibes_analyst1;
set asie.ibes_analyst;
keep analys permno ticker year_rev month_rev;
run;

/**data asie.main_file3_gic;
set asie.main_file3;
keep ticker year month gic;
run;

proc sql;
  create table asie.ibes_analyst2 as
  select a.*,b.*
    from asie.ibes_analyst1 as a left join asie.main_file3_gic as b
    on a.ticker=b.ticker 
    and a.year_rev=b.year
	and a.month_rev=b.month;
 run;**/

proc sql;
  create table asie.ibes_analyst2 as
  select a.*,b.*
    from asie.ibes_analyst1 as a left join asie.firm_gic as b
    on a.ticker=b.ticker 
    and a.year_rev=b.fyear
	and a.month_rev=b.fmonth;
 run;

proc sort data=asie.ibes_analyst2 nodupkey;
by analys year_rev month_rev ticker;
run; 

data asie.ibes_analyst3;
set asie.ibes_analyst2;
if year_rev ge 1995 and year_rev le 2017;
run;

data asie.ibes_analyst4;
set asie.ibes_analyst3;
if gic ne .;
run;

data asie.ibes_analyst5;
set asie.ibes_analyst4;
run;

proc sort data=asie.ibes_analyst5 nodupkey;
by analys year_rev ticker;
run; 

proc sort data=asie.ibes_analyst5;
by analys year_rev gic;
run; 

proc means data=asie.ibes_analyst5 noprint;
by analys year_rev;
var gic;
output out=asie.analyst_count_gic n=analyst_count mean=m_gic std=s_gic;
proc print;
run;

data asie.analyst_count_gic1;
set asie.analyst_count_gic;
if _freq_ = 1 then s_gic=0;
proc print;
run;

proc sql;
  create table asie.ibes_analyst6 as
  select a.*,b.*
    from asie.ibes_analyst5 as a left join asie.analyst_count_gic1 as b
    on a.analys=b.analys 
    and a.year_rev=b.year_rev;
 run;

data asie.ibes_analyst7;
set asie.ibes_analyst6;
/**if mean_out_sector_pct ne .;**/
if gic ne .;
if s_gic ne .;
run;

data asie.ibes_analyst8;
set asie.ibes_analyst7;
if s_gic = 0 then do;
one_gic=1;
end;
else do;
one_gic=0;
end;
run; 

proc sort data=asie.ibes_analyst8 nodupkey;
by ticker year_rev analys;
run;
 
proc means data=asie.ibes_analyst8 noprint;
by ticker year_rev;
var one_gic;
output out=asie.ibes_analyst9 mean=mean_one_gic; /** this file is the proportion of analysts that cover only their GIC **/
proc print;
run;

proc sql;
  create table asie.qf6_full_d_analyst as
  select a.*,b.*
    from asie.qf6_full_d as a left join asie.ibes_analyst9 as b /** 'qf6_full_d' is from TABLE 3 code **/
    on a.mentionedticker=b.ticker 
    and a.year=b.year_rev;
 run;

/** get the proportion of analysts that cover the GIC of the mentioning firms **/

/** get the GIC of the mentioned and the mentioning firms from the code 'Code_SimpleCount_Base' **/

data asie.mentioned_mentioning_gic;
set asie.simple_count2_d;
if m_gic ne .;
if f_gic ne .;
proc print;
run;

/** add the analysts covering the mentioned and menitoning firms **/

data asie.ibes_analyst5_mentioned;
set asie.ibes_analyst5;
m_ticker=ticker;
m_analys=analys;
mm_gic=gic;
keep year_rev month_rev m_ticker m_analys mm_gic;
proc print;
run;

proc sql;
  create table asie.mentioned_mentioning_gic2 as
  select a.*,b.*
    from asie.mentioned_mentioning_gic as a left join asie.ibes_analyst5_mentioned as b
    on a.mentionedticker=b.m_ticker 
    and a.byear=b.year_rev;
 run;

data asie.mentioned_mentioning_gic3;
set asie.mentioned_mentioning_gic2;
if m_analys ne .;
run;

data asie.ibes_analyst5_mentioning;
set asie.ibes_analyst5;
f_ticker=ticker;
f_analys=analys;
ff_gic=gic;
keep year_rev month_rev f_ticker f_analys ff_gic;
proc print;
run;

proc sql;
  create table asie.mentioned_mentioning_gic4 as
  select a.*,b.*
    from asie.mentioned_mentioning_gic3 as a left join asie.ibes_analyst5_mentioning as b
    on a.filingticker=b.f_ticker 
    and a.byear=b.year_rev;
 run;

data asie.mentioned_mentioning_gic5;
set asie.mentioned_mentioning_gic4;
if f_analys ne .;
run;

/** identify the mentioned firms that have analysts in the same GIC as the metniotong firms **/

data asie.mentioned_mentioning_gic6;
set asie.mentioned_mentioning_gic5;
if m_analys=f_analys and ff_gic=m_gic then do;
same_gic=1;
end;
else do;
same_gic=0;
end;
run;

proc sort data=asie.mentioned_mentioning_gic6;
by mentionedticker byear bmonth;
run;

proc means data=asie.mentioned_mentioning_gic6 noprint;
by mentionedticker byear;
var same_gic;
output out=asie.mentioned_mentioning_gic7 mean=same_gic;
proc print;
run;

proc sql;
  create table asie.qf6_full_d_analyst2 as
  select a.*,b.*
    from asie.qf6_full_d_analyst as a left join asie.mentioned_mentioning_gic7 as b
    on a.mentionedticker=b.mentionedticker 
    and a.year=b.byear;
 run;


/***************************/
/** Single sort **/
/***************************/

/** Calculating average excess returns **/

data asie.qf7a;
set asie.qf6_full_d_analyst2;
if mean_one_gic = .; /** firms with no analyst coverage **/
/**if l3simple_count > 0;**/ /** use this condition for only mentioned firms **/
run;

data asie.qf7a;
set asie.qf6_full_d_analyst2;
if mean_one_gic ne .; /** firms with analyst coverage **/
/**if l3simple_count > 0;**/ /** use this condition for only mentioned firms **/
run;

data asie.qf7a;
set asie.qf6_full_d_analyst2;
if mean_one_gic ne .;
if mean_one_gic = 1; /** all analysts cover one GIC **/
/**if l3simple_count > 0;**/ /** use this condition for only mentioned firms **/
run;

data asie.qf7a;
set asie.qf6_full_d_analyst2;
if mean_one_gic ne .;
if mean_one_gic < 1; /** some of the analysts cover multiple GICs **/
/**if l3simple_count > 0;**/ /** use this condition for only mentioned firms **/
run;

data asie.qf7a;
set asie.qf6_full_d_analyst2;
if mean_one_gic ne .;
if mean_one_gic < 1 and same_gic > 0; /** some of the analysts cover multiple GICs and those of some of the mentioning firms**/
/**if l3simple_count > 0;**/ /** use this condition for only mentioned firms **/
run;

proc sort data=asie.qf7a;
by year month gr_decile;
run;

proc means data=asie.qf7a noprint;
by year month gr_decile;
var ex_ret/weight=l_size_x; 
output out=asie.qf8 mean=mean_exret;
run;

/** Adding the factors **/

data asie.qf8a;
set asie.qf8;
if _freq_ ge 5;
run;

proc sql;
  create table asie.qf9 as
  select a.*,b.*
    from asie.qf8a as a, asie.factorsmonthly2017 as b
    where a.year=b.year
    and a.month=b.month;
 run;

data asie.qf9a;
set asie.qf9;
if year ge 1994;
run;

proc sort data=asie.qf9a;
by year month gr_decile;
run;

data asie.qf9b;
set asie.qf9a;
by year month gr_decile;
if first.month then do x=0;
end;
x+1;
run;

proc sort data=asie.qf9b;
by descending year descending month descending x;
run;

data asie.qf9c;
set asie.qf9b;
by descending year descending month descending x;
if first.month then do y=x;
end;
y+0;
run;

proc sort data=asie.qf9c;
by year month gr_decile;
run;

data asie.qf10;
set asie.qf9c;
if y=4;
run;

proc sort data=asie.qf10; 
by gr_decile year month;
run;

proc means data=asie.qf10 noprint; /** this is the average portfolio size **/
by gr_decile;
var _freq_; 
output out=asie.qf10_port mean=firm_port;
proc print;
run;

proc sort data=asie.qf10; 
by gr_decile year month;
run;

/**proc reg data=asie.qf10;
by gr_decile;
model mean_exret=;
run;

proc reg data=asie.qf10;
by gr_decile;
model mean_exret=emkt;
run;

proc reg data=asie.qf10;
by gr_decile;
model mean_exret=emkt smb hml;
run;

proc reg data=asie.qf10;
by gr_decile;
model mean_exret=emkt smb hml mom;
run;

proc reg data=asie.qf10;
by gr_decile;
model mean_exret=emkt smb hml rmw cma;
run;**/

proc reg data=asie.qf10;
by gr_decile;
model mean_exret=emkt smb hml rmw cma mom;
run;


/** Mean return for the difference **/

proc sort data=asie.qf10;
by year month gr_decile;
run;

/** high mention vs no mention **/

data asie.qf11; /** this is for portfolio 5-0 **/
set asie.qf10;
lag3_grd=lag3(gr_decile);
lag3_meanf=lag3(mean_exret);
lag3year=lag3(year);
if gr_decile=2 and lag3_grd=-1 and lag3year=year then
p_ret=mean_exret-lag3_meanf;
if gr_decile=2; 
run;

/** high mention vs low mention **/

data asie.qf11; /** this is for portfolio 5-0 **/
set asie.qf10;
lag2_grd=lag2(gr_decile);
lag2_meanf=lag2(mean_exret);
lag2year=lag2(year);
if gr_decile=2 and lag2_grd=0 and lag2year=year then
p_ret=mean_exret-lag2_meanf;
if gr_decile=2; 
run;

proc sort data=asie.qf11;
by year month;
run;

/**proc reg data=asie.qf11;
model p_ret=;
run;

proc reg data=asie.qf11;
model p_ret=emkt;
run;

proc reg data=asie.qf11;
model p_ret=emkt smb hml;
run;

proc reg data=asie.qf11;
model p_ret=emkt smb hml mom;
run;

proc reg data=asie.qf11;
model p_ret=emkt smb hml rmw cma;
run;**/

proc reg data=asie.qf11;
model p_ret=emkt smb hml rmw cma mom;
run;


/***************************************************/
/** FIGURE 5 
/***************************************************/

data asie.simple_count7a;
set asie.simple_count7_all; /** change simple_count7_all to simple_count7_inter and simple_count7_in **/
keep mentionedticker year month l3simple_count l3simple_mean_size l3simple_sum_size l3simple_size_count;
run;

/** merge with the firm characteristics **/

proc sql;
  create table asie.simple_count7b as
  select a.*,b.*
    from asie.firm_characteristics as a left join asie.simple_count7a as b
    on a.tic=b.mentionedticker
    and a.year=b.year 
    and a.month=b.month;
 run;

data asie.simple_count7c;
set asie.simple_count7b;
if mentionedticker = '' then l3simple_count=0;
if mentionedticker = '' then l3simple_mean_size=.;
if mentionedticker = '' then l3simple_sum_size=.;
if mentionedticker = '' then l3simple_size_count=.;
run;

/** portfolio sort **/

proc univariate data=asie.simple_count7c;
var l3simple_count l3simple_mean_size l3simple_sum_size;
run;

proc sql;
  create table asie.simple_count7f as
  select a.*,b.*
    from asie.simple_count7c as a left join asie.size_breakpoints_new as b 
    on a.year=b.year 
    and a.month=b.month;
 run;

data asie.simple_count7g;
set asie.simple_count7f;
if l_size ne .;
l_size_x=l_size;
run;

proc sort data=asie.simple_count7g;
by year month l_size;
run;

data asie.simple_count7h;
set asie.simple_count7g;
if l_size/1000 le p10 then do;
r_var=0;
end;
else do;
if l_size/1000 le p20 then do;
r_var=1;
end;
else do;
if l_size/1000 le p30 then do;
r_var=2;
end;
else do;
if l_size/1000 le p40 then do;
r_var=3;
end;
else do;
if l_size/1000 le p50 then do;
r_var=4;
end;
else do;
if l_size/1000 le p60 then do;
r_var=5;
end;
else do;
if l_size/1000 le p70 then do;
r_var=6;
end;
else do;
if l_size/1000 le p80 then do;
r_var=7;
end;
else do;
if l_size/1000 le p90 then do;
r_var=8;
end;
else do;
r_var=9;
end;
end;
end;
end;
end;
end;
end;
end;
end;
run;

proc freq data=asie.simple_count7h;
tables r_var;
run;

data asie.simple_count7i;
set asie.simple_count7h;
if abs(prc) ge 5;
if r_var ge 2;
run;


***************************************************************
* Get the fundamentals
***************************************************************;

proc sql;
  create table asie.pooled_reg1 as /**'ann_fund12' is from the code 'fundamentals_annual4' **/
  select a.*,b.*
    from asie.simple_count7i as a left join asie.ann_fund12 as b 
    on a.mentionedticker=b.tic 
    and a.year=b.year
    and a.month=b.month;
 run;

proc sort data=asie.pooled_reg1 nodupkey;
by mentionedticker year month;
run;

proc univariate data=asie.pooled_reg1; 
var ex_ret l3simple_count l_Size Mtbq l_Past6_ret Profitability3 Investment3 l_Beta l_Idiovol;
run;


*----------------------winsorizing outliers-----------------------------------------------------;
%macro outliers();

%let vars=  l_Size Mtbq l_Past6_ret Profitability3 Investment3 l_Beta l_Idiovol;

%let cvar=%scan(&vars,1);
%let pos=1;

%do %while (&cvar ne );

%put &cvar;

proc sort data=asie.pooled_reg1; by year month; run;

proc means data=asie.pooled_reg1 min p1 p99 max median mean n noprint;
by year month;
var &cvar;
output out = asie.outliers 
p1 (&cvar) = p1_&cvar
p99 (&cvar) = p99_&cvar;
run;

proc sql;
create table asie.temp as select a.*, b.p1_&cvar, b.p99_&cvar
from asie.pooled_reg1 as a left join asie.outliers as b
on a.year=b.year and a.month=b.month;
quit;

data asie.pooled_reg1; set asie.temp;
if &cvar<p1_&cvar and &cvar ne . and p1_&cvar ne . then &cvar=p1_&cvar;
if &cvar>p99_&cvar and &cvar ne . and p99_&cvar ne . then &cvar=p99_&cvar;
drop p1_&cvar p99_&cvar;
run;

%let pos=%eval(&pos+1);
%let cvar=%scan(&vars,&pos);

%end;

%mend outliers;


%outliers;

proc sort data=asie.pooled_reg1; by permno year month; run;

data asie.pooled_reg2; set asie.pooled_reg1; run;


/** add the GIC **/

proc sql;
  create table asie.pooled_reg3 as
  select a.*,b.*
    from asie.pooled_reg2 as a left join asie.SEC_page_rank_gic as b
    on a.mentionedticker=b.ticker 
    and a.year=b.fyear
	and a.month=b.fmonth;
 run;


/** Adjust to industry means by GIC classifications **/

data asie.pooled_reg4;
set asie.pooled_reg3;
if GIC ne .;
if lsize ne .;
if lmtb ne .;
run;

proc sort data=asie.pooled_reg4;
by date GIC lsize;
run;

proc rank groups=3 data=asie.pooled_reg4 out=asie.pooled_reg5;
by date GIC;
var lsize; 
ranks size_t;
run;

proc sort data=asie.pooled_reg5;
by date GIC lmtb;
run;

proc rank groups=3 data=asie.pooled_reg5 out=asie.pooled_reg6;
by date GIC;
var lmtb; 
ranks mtb_t;
run;

proc sort data=asie.pooled_reg6;
by date GIC size_t mtb_t;
run;

proc means data=asie.pooled_reg6 noprint;
by date GIC size_t mtb_t;
var SALEGrowth_1_0 SALEGrowth_2_0 SALEGrowth_3_0
    OIBDPGrowth_1_0 OIBDPGrowth_2_0 OIBDPGrowth_3_0
    
    SALE_lATGrowth_1_0 SALE_lATGrowth_2_0 SALE_lATGrowth_3_0
    OIBDP_lATGrowth_1_0 OIBDP_lATGrowth_2_0 OIBDP_lATGrowth_3_0;
output out=asie.pooled_reg6_ind median=ind_SALEGrowth_1_0 ind_SALEGrowth_2_0 ind_SALEGrowth_3_0
    								   ind_OIBDPGrowth_1_0 ind_OIBDPGrowth_2_0 ind_OIBDPGrowth_3_0
                                      
									   ind_SALE_lATGrowth_1_0 ind_SALE_lATGrowth_2_0 ind_SALE_lATGrowth_3_0
    								   ind_OIBDP_lATGrowth_1_0 ind_OIBDP_lATGrowth_2_0 ind_OIBDP_lATGrowth_3_0;
run;

proc sql;
  create table asie.pooled_reg7 as
  select a.*,b.*
    from asie.pooled_reg6 as a left join asie.pooled_reg6_ind as b
    on a.date=b.date
    and a.GIC=b.GIC
    and a.size_t=b.size_t
    and a.mtb_t=b.mtb_t;
 run;

data asie.pooled_reg8;
set asie.pooled_reg7;

i_SALEGrowth_1_0=SALEGrowth_1_0-ind_SALEGrowth_1_0;
i_SALEGrowth_2_0=SALEGrowth_2_0-ind_SALEGrowth_2_0;
i_SALEGrowth_3_0=SALEGrowth_3_0-ind_SALEGrowth_3_0;
i_OIBDPGrowth_1_0=OIBDPGrowth_1_0-ind_OIBDPGrowth_1_0;
i_OIBDPGrowth_2_0=OIBDPGrowth_2_0-ind_OIBDPGrowth_2_0;
i_OIBDPGrowth_3_0=OIBDPGrowth_3_0-ind_OIBDPGrowth_3_0;

i_SALE_lATGrowth_1_0=SALE_lATGrowth_1_0-ind_SALE_lATGrowth_1_0;
i_SALE_lATGrowth_2_0=SALE_lATGrowth_2_0-ind_SALE_lATGrowth_2_0;
i_SALE_lATGrowth_3_0=SALE_lATGrowth_3_0-ind_SALE_lATGrowth_3_0;
i_OIBDP_lATGrowth_1_0=OIBDP_lATGrowth_1_0-ind_OIBDP_lATGrowth_1_0;
i_OIBDP_lATGrowth_2_0=OIBDP_lATGrowth_2_0-ind_OIBDP_lATGrowth_2_0;
i_OIBDP_lATGrowth_3_0=OIBDP_lATGrowth_3_0-ind_OIBDP_lATGrowth_3_0;

run;

proc univariate data=asie.pooled_reg8;
var 
    SALEGrowth_1_0 SALEGrowth_2_0 SALEGrowth_3_0
    OIBDPGrowth_1_0 OIBDPGrowth_2_0 OIBDPGrowth_3_0
   
    SALE_lATGrowth_1_0 SALE_lATGrowth_2_0 SALE_lATGrowth_3_0
    OIBDP_lATGrowth_1_0 OIBDP_lATGrowth_2_0 OIBDP_lATGrowth_3_0
   
   	i_SALEGrowth_1_0 i_SALEGrowth_2_0 i_SALEGrowth_3_0
    i_OIBDPGrowth_1_0 i_OIBDPGrowth_2_0 i_OIBDPGrowth_3_0
   
	i_SALE_lATGrowth_1_0 i_SALE_lATGrowth_2_0 i_SALE_lATGrowth_3_0
    i_OIBDP_lATGrowth_1_0 i_OIBDP_lATGrowth_2_0 i_OIBDP_lATGrowth_3_0;
run;


/****************************/
/** Fundamental means 
/****************************/

data asie.pooled_reg8a;
set asie.pooled_reg8;
if l_size ne .;
run;

proc sort data=asie.pooled_reg8a;
by mentionedticker year month;
run;

data asie.pooled_reg8b;
set asie.pooled_reg8a;
if l3simple_count > 0 and l3simple_mean_size>l_size then do;
targeting=1;
end;
else do;
targeting=0;
end;
if l3simple_count > 0 and l3simple_mean_size<l_size then do;
admiring=1;
end;
else do;
admiring=0;
end;
run;

proc sort data=asie.pooled_reg8b;
by mentionedticker year month;
run;

data asie.pooled_reg8c;
set asie.pooled_reg8b;
lt=lag12(mentionedticker);
lag_targeting=lag12(targeting);
lag_admiring=lag12(admiring);
if lt=mentionedticker then do;
ltargeting=lag_targeting;
ladmiring=lag_admiring;
end;
run;

data asie.pooled_reg9;
set asie.pooled_reg8c;
new_targeting=targeting-ltargeting;
new_admiring=admiring-ladmiring;
run;

*----------------------winsorizing outliers-----------------------------------------------------;
%macro outliers();

%let vars= SALEGrowth_1_0 SALEGrowth_2_0 SALEGrowth_3_0
           OIBDPGrowth_1_0 OIBDPGrowth_2_0 OIBDPGrowth_3_0
           
           SALE_lATGrowth_1_0 SALE_lATGrowth_2_0 SALE_lATGrowth_3_0
           OIBDP_lATGrowth_1_0 OIBDP_lATGrowth_2_0 OIBDP_lATGrowth_3_0
           
           i_SALEGrowth_1_0 i_SALEGrowth_2_0 i_SALEGrowth_3_0
           i_OIBDPGrowth_1_0 i_OIBDPGrowth_2_0 i_OIBDPGrowth_3_0
           
	       i_SALE_lATGrowth_1_0 i_SALE_lATGrowth_2_0 i_SALE_lATGrowth_3_0
           i_OIBDP_lATGrowth_1_0 i_OIBDP_lATGrowth_2_0 i_OIBDP_lATGrowth_3_0;

%let cvar=%scan(&vars,1);
%let pos=1;

%do %while (&cvar ne );

%put &cvar;

proc sort data=asie.pooled_reg9; by year month; run;

proc means data=asie.pooled_reg9 min p1 p99 max median mean n noprint;
by year month;
var &cvar;
output out = asie.outliers 
p1 (&cvar) = p1_&cvar
p99 (&cvar) = p99_&cvar;
run;

proc sql;
create table asie.temp as select a.*, b.p1_&cvar, b.p99_&cvar
from asie.pooled_reg9 as a left join asie.outliers as b
on a.year=b.year and a.month=b.month;
quit;

data asie.pooled_reg9; set asie.temp;
if &cvar<p1_&cvar and &cvar ne . and p1_&cvar ne . then &cvar=p1_&cvar;
if &cvar>p99_&cvar and &cvar ne . and p99_&cvar ne . then &cvar=p99_&cvar;
drop p1_&cvar p99_&cvar;
run;

%let pos=%eval(&pos+1);
%let cvar=%scan(&vars,&pos);

%end;

%mend outliers;

%outliers;

proc sort data=asie.pooled_reg9; by permno year month; run;

data asie.pooled_reg10; set asie.pooled_reg9; run;


proc univariate data=asie.pooled_reg10;
var i_SALEGrowth_1_0 i_SALEGrowth_2_0 i_SALEGrowth_3_0
    i_OIBDPGrowth_1_0 i_OIBDPGrowth_2_0 i_OIBDPGrowth_3_0
    
	i_SALE_lATGrowth_1_0 i_SALE_lATGrowth_2_0 i_SALE_lATGrowth_3_0
    i_OIBDP_lATGrowth_1_0 i_OIBDP_lATGrowth_2_0 i_OIBDP_lATGrowth_3_0;
run;

/** full sample **/
data asie.pooled_reg11;
set asie.pooled_reg10;
run;

/** targeting **/
data asie.pooled_reg11;
set asie.pooled_reg10; 
if l3simple_count > 0 and l3simple_mean_size>lsize;
run;

/** admiring **/
data asie.pooled_reg11;
set asie.pooled_reg10; 
if l3simple_count > 0 and l3simple_mean_size<lsize;
run;

proc means data=asie.pooled_reg11 noprint;
var i_SALEGrowth_1_0 i_SALEGrowth_2_0 i_SALEGrowth_3_0
    i_OIBDPGrowth_1_0 i_OIBDPGrowth_2_0 i_OIBDPGrowth_3_0
   
	i_SALE_lATGrowth_1_0 i_SALE_lATGrowth_2_0 i_SALE_lATGrowth_3_0
    i_OIBDP_lATGrowth_1_0 i_OIBDP_lATGrowth_2_0 i_OIBDP_lATGrowth_3_0;
output out=asie.pooled_reg11_mean mean=i_SALEGrowth_1_0 i_SALEGrowth_2_0 i_SALEGrowth_3_0
   									   i_OIBDPGrowth_1_0 i_OIBDPGrowth_2_0 i_OIBDPGrowth_3_0
   									   
									   i_SALE_lATGrowth_1_0 i_SALE_lATGrowth_2_0 i_SALE_lATGrowth_3_0
   									   i_OIBDP_lATGrowth_1_0 i_OIBDP_lATGrowth_2_0 i_OIBDP_lATGrowth_3_0;
proc print;
run;

ods excel file='/sas files/myfile.xlsx';
proc print data=asie.pooled_reg11_mean;
run;
ods excel close;


/** t-tests **/

data asie.targeting_sample;
set asie.pooled_reg11;
trgt=1;
run;

data asie.admiring_sample;
set asie.pooled_reg11;
trgt=0;
run;

data asie.targeting_admiring_sample;
set asie.targeting_sample asie.admiring_sample;
run;

proc ttest data=asie.targeting_admiring_sample;
class trgt;
var i_SALEGrowth_1_0 i_SALEGrowth_2_0 i_SALEGrowth_3_0
    i_OIBDPGrowth_1_0 i_OIBDPGrowth_2_0 i_OIBDPGrowth_3_0;
run;

/** Correct for autocorrelaltio using Newey-West SEs **/

proc sort data=asie.targeting_admiring_sample;
by mentionedticker year month;
run;

proc model data=asie.targeting_admiring_sample; 
parms b0 b1; 
i_SALEGrowth_1_0 = b0+b1*trgt; 
fit i_SALEGrowth_1_0 /gmm kernel = (bart,13,0) vardef=n; 
instruments trgt; 
test trgt;
run;


/***************************************************/
/** TABLE 8 / FIGURE 6
/***************************************************/

/************************************/
/** merger target prediction **/
/************************************/

data asie.simple_count7a;
set asie.simple_count7_all; /** change simple_count7_all to simple_count7_inter and simple_count7_in **/
keep mentionedticker year month l3simple_count l3simple_mean_size l3simple_max_size l3simple_min_size l3simple_sum_size l3simple_size_count;
run;

/** merge with the firm characteristics **/

proc sql;
  create table asie.simple_count7b as
  select a.*,b.*
    from asie.firm_characteristics as a left join asie.simple_count7a as b
    on a.tic=b.mentionedticker
    and a.year=b.year 
    and a.month=b.month;
 run;

data asie.simple_count7c;
set asie.simple_count7b;
if mentionedticker = '' then l3simple_count=0;
if mentionedticker = '' then l3simple_mean_size=.;
if mentionedticker = '' then l3simple_sum_size=.;
if mentionedticker = '' then l3simple_size_count=.;
run;

data asie.simple_count7d;
set asie.simple_count7c;
if l_size ne .;
run;

data asie.simple_count7e;
set asie.simple_count7d;
if l3simple_max_size>0 and l3simple_max_size>l_size then do; 
admire_l=0;
end;
else do;
if l3simple_max_size>0 and l3simple_max_size<l_size then do; 
admire_l=1;
end;
else do;
admire_l=.;
end;
end;
if tic ne '';
keep tic year month l_size l3simple_max_size l3simple_min_size l3simple_mean_size admire_l l3simple_count;
run;

data asie.simple_count7f;
set asie.simple_count7e;
if l3simple_mean_size>0 and l3simple_mean_size>l_size then do; 
admire_a=0;
end;
else do;
if l3simple_mean_size>0 and l3simple_mean_size<l_size then do; 
admire_a=1;
end;
else do;
admire_a=.;
end;
end;
if tic ne '';
keep tic year month l_size l3simple_max_size l3simple_min_size l3simple_mean_size admire_l admire_a l3simple_count;
run;

data asie.simple_count7g;
set asie.simple_count7f;
if l3simple_min_size>0 and l3simple_min_size>l_size then do; 
admire_s=0;
end;
else do;
if l3simple_min_size>0 and l3simple_min_size<l_size then do; 
admire_s=1;
end;
else do;
admire_s=.;
end;
end;
if tic ne '';
keep tic year month l_size l3simple_max_size l3simple_min_size l3simple_mean_size admire_l admire_a admire_s l3simple_count;
run;

proc sql;
  create table asie.qf7a as
  select a.*,b.*
    from asie.qf6_full_d as a left join asie.simple_count7g as b /** 'qf6_full_d' is from TABLE 3 code, change to qf6_full_d_inter and qf6_full_d_in **/
    on a.ticker=b.tic
    and a.year=b.year
    and a.month=b.month;
 run;

proc sort data=asie.qf7a nodupkey;
by ticker year month;
run;

/** All **/

data asie.all_crank;
set asie.qf7a;
run;

/** add the merger target data **/

data asie.permno_cusip_gvkey_tic_sic1;
set asie.permno_cusip_gvkey_tic_sic; 
cnum=substr(cusip,1,6);
year=year(date);
month=month(date);
run;

data asie.target_firms1_OneObs1a;
set asie.target_firms1_OneObs1; /** target firms file from Thomson One Banker **/ 
merger_ann=1;
year=year(Date_ann);
month=month(Date_ann);
run;

proc sql;
  create table asie.target_firms1_OneObs2 as
  select a.*,b.*
    from asie.target_firms1_OneObs1a as a left join asie.permno_cusip_gvkey_tic_sic1 as b
    on a.target_cusip=b.cnum
    and a.year=b.year
    and a.month=b.month;
 run;

proc sort data=asie.target_firms1_OneObs2 nodupkey;
by target_cusip year month;
run;

proc sql;
  create table asie.all_crank2 as
  select a.*,b.*
    from asie.all_crank as a left join asie.target_firms1_OneObs2 as b
    on a.ticker=b.tic
    and a.year=b.year
    and a.month=b.month;
 run;

data asie.all_crank3;
set asie.all_crank2;
if merger_ann=. then merger_ann=0;
run;

proc sort data=asie.all_crank3;
by descending ticker descending year descending month;
run;

data asie.all_crank4;
set asie.all_crank3;
lt12=lag12(ticker);
lma1=lag1(merger_ann);
lma2=lag2(merger_ann);
lma3=lag3(merger_ann);
lma4=lag4(merger_ann);
lma5=lag5(merger_ann);
lma6=lag6(merger_ann);
lma7=lag7(merger_ann);
lma8=lag8(merger_ann);
lma9=lag9(merger_ann);
lma10=lag10(merger_ann);
lma11=lag11(merger_ann);
lma12=lag12(merger_ann);
run;

data asie.all_crank5;
set asie.all_crank4;
lt1=lag1(ticker);
lt2=lag2(ticker);
lt3=lag3(ticker);
lt4=lag4(ticker);
lt5=lag5(ticker);
lt6=lag6(ticker);
lt7=lag7(ticker);
lt8=lag8(ticker);
lt9=lag9(ticker);
lt10=lag10(ticker);
lt11=lag11(ticker);
lt12=lag12(ticker);
if lt1 ne ticker then lma1=0;
if lt2 ne ticker then lma2=0;
if lt3 ne ticker then lma3=0;
if lt4 ne ticker then lma4=0;
if lt5 ne ticker then lma5=0;
if lt6 ne ticker then lma6=0;
if lt7 ne ticker then lma7=0;
if lt8 ne ticker then lma8=0;
if lt9 ne ticker then lma9=0;
if lt10 ne ticker then lma10=0;
if lt11 ne ticker then lma11=0;
if lt12 ne ticker then lma12=0;
run;

proc sort data=asie.all_crank5;
by ticker year month;
run;

data asie.all_crank6;
set asie.all_crank5;
merger_year=merger_ann+lma1+lma2+lma3+lma4+lma5+lma6+lma7+lma8+lma9+lma10+lma11+lma12;
if merger_year = 1 then do;
mrg_year=1; 
end;
else do;
mrg_year=0;
end;
run;

proc sort data=asie.all_crank6;
by ticker year month;
run;

/** add the merger probabalities **/

data asie.all_crank7;
set asie.all_crank6;
drop date;
run; 

data asie.all_crank8;
set asie.all_crank7;
day=1;
date=MDY(Month, day, year);
format Date monyy7.;
run; 

proc sql;
  create table asie.takeoverprobabilities1 as
  select a.*,b.*
    from asie.takeoverprobabilities as a left join asie.permno_cusip_gvkey_tic_sic1 as b /** takeover probabilities are based on the model of Billett and Xue (2007) **/
    on a.permno=b.permno
    and a.year=b.year
    and a.month=b.month;
 run;

proc sql;
create table asie.all_crank9 as select * /** this is the main file 'asie.all_crank9' **/
from asie.all_crank8 as a left join asie.takeoverprobabilities1 as b
    on a.ticker = b.tic 
    and	intck('month',a.date,b.datadate) between -17 and -6;
quit;


/** regression of future marger targets on mention count when controling for merger probability **/

data asie.all_crank10;
set asie.all_crank9;
keep permno ticker date year month to mrg_year gr_decile admire_l admire_a admire_s l3simple_count;
run;

/** targeted vs admired mentioned firms **/

data asie.Targeted_admired_high_crank;
set asie.all_crank10;
if l3simple_count ge 1;
target_l=1-admire_l;
target_a=1-admire_a;
target_s=1-admire_s;
run;

proc logistic data=asie.Targeted_admired_high_crank descending;
class ticker date/param=ref;
   model mrg_year = to;
run;

proc logistic data=asie.Targeted_admired_high_crank descending;
class ticker date/param=ref;
   model mrg_year = target_a to;
run;


/***************************************************/
/** TABLE 9 
/***************************************************/

/*************************/
/** mention beta **/
/*************************/

/** get the excess return of the hedge portfolio as the mention factor from the codes of TABLE 3 and TABLE 4 **/

data asie.mention_factor1;
set asie.qf11;
mention_factor=p_ret;
keep year month mention_factor emkt smb hml rmw cma mom;
proc print;
run;

proc univariate data=asie.mention_factor1;
var mention_factor;
run;

/** merge the factor to firm characteristics **/

data asie.firm_characteristics_a;
set asie.firm_characteristics;
keep year month tic ex_ret l_Size Mtbq l_Past6_ret Profitability3 Investment3 l_Beta l_Idiovol;
run;

proc sql;
  create table asie.main_file3_b as
  select a.*,b.*
    from asie.firm_characteristics_a as a, asie.mention_factor1 as b
    where a.year=b.year
    and a.month=b.month;
 run;

/** calculate rolling mention beta **/

%let wrds = wrds.wharton.upenn.edu 4016; 
options comamid=TCP remote=WRDS;
signon username=_prompt_;

rsubmit;
proc upload data=asie.main_file3_b out=M1;
run;

/** Indicator for lag36 data **/
rsubmit;
proc sort data=M1;
by tic year month;
run;

data M2;
set M1;
lag36tic=lag36(tic);
if tic=lag36tic then LL=1;
run; 

/** Define order variable to merge the regressions results with the main file **/

data M3;
set M2;
obs=_n_;
run;

/** Rolling regressions of 36 months **/

data rolling36; 
   set M3 (rename=(obs=rollid));
   if _n_+35 lt nobs then do point = _n_ to _n_+35; 
   set M3 point=point nobs=nobs; 
   output; 
   end; 
   run;

proc reg noprint outest=est; 
   by rollid; 
   model ex_ret = mention_factor emkt smb hml rmw cma mom; 
   run; 

data beta1;
set est;
obs1=rollid+36;
mention_beta=mention_factor;
keep obs1 mention_beta;
run; 

proc sql;
  create table M4 as
  select a.*,b.*
    from M3 as a left join beta1 as b
    on a.obs=b.obs1;
 run;

data M5;
set M4;
mention_beta1=ll*mention_beta;
run;

data M6;
set M5;
if mention_beta1 ge 10 then mention_beta1=. ;
if mention_beta1 le -10 then mention_beta1=.;
run;

data mention_beta;
set M6;
if mention_beta1 ne .;
keep tic year month mention_beta1 ex_ret l_Size Mtbq l_Past6_ret Profitability3 Investment3 l_Beta l_Idiovol;
run;

proc download data=mention_beta out=asie.mention_beta36_all_mentions; /** change to mention_beta36_targeting **/
run;


/*******************************************/
/** portfolio sort on mention_beta **/
/*******************************************/

proc sort data=asie.mention_beta36_all_mentions; /** change to mention_beta36_targeting **/
by tic year month;
run;

data asie.mention_beta2;
set asie.mention_beta36_all_mentions; /** change to mention_beta36_targeting **/
lt=lag(tic);
lagmention_beta1=lag(mention_beta1);
if lt=tic then do;
lmention_beta1=lagmention_beta1;
end;
if lmention_beta1 ne .;
run;

proc sort data=asie.mention_beta2;
by year month lmention_beta1;
run;

proc rank groups=5 data=asie.mention_beta2 out=asie.qf6;
var lmention_beta1; 
ranks gr_decile;
by year month;
run;

/** Calculating average excess returns **/

proc sort data=asie.qf6;
by year month gr_decile;
run;

proc means data=asie.qf6 noprint;
by year month gr_decile;
var ex_ret /weight = l_size; 
output out=asie.qf7 mean=mean_exret;
proc print;
run;

/** Adding the factors **/

data asie.qf8;
set asie.qf7;
if _freq_ ge 5;
run;

proc sql;
  create table asie.qf9 as
  select a.*,b.*
    from asie.qf8 as a, asie.factorsmonthly2017 as b
    where a.year=b.year
    and a.month=b.month;
 run;

data asie.qf10;
set asie.qf9;
if year ge 1994;
run;

/** Mean returns of the 5 portolios **/

proc sort data=asie.qf10; 
by gr_decile year month;
run;

proc reg data=asie.qf10;
by gr_decile;
model mean_exret=;
run;

/**proc reg data=asie.qf10;
by gr_decile;
model mean_exret=emkt;
run;

proc reg data=asie.qf10;
by gr_decile;
model mean_exret=emkt smb hml;
run;

proc reg data=asie.qf10;
by gr_decile;
model mean_exret=emkt smb hml mom;
run;

proc reg data=asie.qf10;
by gr_decile;
model mean_exret=emkt smb hml rmw cma;
run;**/

proc reg data=asie.qf10;
by gr_decile;
model mean_exret=emkt smb hml rmw cma mom;
run;


/** Mean return for the difference **/

proc sort data=asie.qf10;
by year month gr_decile;
run;

data asie.qf11; 
set asie.qf10;
lag4_grd=lag4(gr_decile);
lag4_meanf=lag4(mean_exret);
lag4year=lag4(year);
if gr_decile=4 and lag4_grd=0 and lag4year=year then
p_ret=mean_exret-lag4_meanf;
if gr_decile=4; 
run;

proc sort data=asie.qf11;
by year month;
run;

proc reg data=asie.qf11;
model p_ret=;
run;

/**proc reg data=asie.qf11;
model p_ret=emkt;
run;

proc reg data=asie.qf11;
model p_ret=emkt smb hml;
run;

proc reg data=asie.qf11;
model p_ret=emkt smb hml mom;
run;

proc reg data=asie.qf11;
model p_ret=emkt smb hml rmw cma;
run;**/

proc reg data=asie.qf11;
model p_ret=emkt smb hml rmw cma mom;
run;


/***************************************************/
/** TABLE 10 
/***************************************************/

data asie.simple_count7a;
set asie.simple_count7_all; 
keep mentionedticker year month l3simple_count l3simple_mean_size l3simple_sum_size l3simple_size_count;
run;

/** merge with the firm characteristics **/

proc sql;
  create table asie.simple_count7b as
  select a.*,b.*
    from asie.firm_characteristics as a left join asie.simple_count7a as b
    on a.tic=b.mentionedticker
    and a.year=b.year 
    and a.month=b.month;
 run;

data asie.simple_count7c;
set asie.simple_count7b;
if mentionedticker = '' then l3simple_count=0;
if mentionedticker = '' then l3simple_mean_size=.;
if mentionedticker = '' then l3simple_sum_size=.;
if mentionedticker = '' then l3simple_size_count=.;
run;

/** remove microcap cap stocks **/

proc univariate data=asie.firm_characteristics;
var l3simple_count l3simple_mean_size l3simple_sum_size;
run;

proc sql;
  create table asie.simple_count7f as
  select a.*,b.*
    from asie.firm_characteristics as a left join asie.size_breakpoints_new as b 
    on a.year=b.year 
    and a.month=b.month;
 run;

data asie.simple_count7g;
set asie.simple_count7f;
if l_size ne .;
l_size_x=l_size;
drop gr_decile;
run;

proc sort data=asie.simple_count7g;
by year month l_size;
run;

data asie.simple_count7h;
set asie.simple_count7g;
if l_size/1000 le p10 then do;
r_var=0;
end;
else do;
if l_size/1000 le p20 then do;
r_var=1;
end;
else do;
if l_size/1000 le p30 then do;
r_var=2;
end;
else do;
if l_size/1000 le p40 then do;
r_var=3;
end;
else do;
if l_size/1000 le p50 then do;
r_var=4;
end;
else do;
if l_size/1000 le p60 then do;
r_var=5;
end;
else do;
if l_size/1000 le p70 then do;
r_var=6;
end;
else do;
if l_size/1000 le p80 then do;
r_var=7;
end;
else do;
if l_size/1000 le p90 then do;
r_var=8;
end;
else do;
r_var=9;
end;
end;
end;
end;
end;
end;
end;
end;
end;
run;

proc freq data=asie.simple_count7h;
tables r_var;
run;

data asie.simple_count7i;
set asie.simple_count7h;
if abs(prc) ge 5;
if r_var ge 2;
if mentionedticker = '' then l3simple_count=0;
if mentionedticker = '' then l3simple_mean_size=0;
if mentionedticker = '' then l3simple_sum_size=0;
if mentionedticker = '' then l3simple_max_size=0;
if mentionedticker = '' then l3simple_size_count=0;
run;

/** add GIC **/

proc sql;
  create table asie.simple_count7i_2 as
  select a.*,b.*
    from asie.simple_count7i as a left join asie.firm_gic_2 as b
    on a.mentionedticker=b.ticker;
 run;

/** add GIC OL rank **/

proc sql;
  create table asie.simple_count7i_3 as
  select a.*,b.*
    from asie.simple_count7i_2 as a left join asie.GIC_OL1 as b
    on a.GIC=b.GIC
    and a.year=b.year
    and a.month=b.month;
 run;

/** add GIC HH rank **/

proc sql;
  create table asie.simple_count7i_4 as
  select a.*,b.*
    from asie.simple_count7i_3 as a left join asie.GIC_HH as b
    on a.GIC=b.GIC
    and a.year=b.year
    and a.month=b.month;
 run;

/** add ff48 **/

data asie.ff48_2;
set asie.operating_leverage2;
if ff48 ne .;
keep tic year month ff48;
run;

proc sort data=asie.ff48_2 nodupkey;
by tic;
run;

proc sql;
  create table asie.simple_count7i_5 as
  select a.*,b.*
    from asie.simple_count7i_4 as a left join asie.ff48_2 as b
    on a.mentionedticker=b.tic;
 run;

/** add ff48 OL rank **/

proc sql;
  create table asie.simple_count7i_6 as
  select a.*,b.*
    from asie.simple_count7i_5 as a left join asie.ff48_OL1 as b
    on a.ff48=b.ff48
    and a.year=b.year
    and a.month=b.month;
 run;

/** add ff48 HH rank **/

proc sql;
  create table asie.simple_count7i_7 as
  select a.*,b.*
    from asie.simple_count7i_6 as a left join asie.ff48_HH as b
    on a.ff48=b.ff48
    and a.year=b.year
    and a.month=b.month;
 run;

proc univariate data=asie.simple_count7i_7; 
var ex_ret l3simple_count l_Size Mtbq l_Past6_ret Profitability3 Investment3 l_Beta l_Idiovol
    ind_GIC_OL1 ind_ff48_OL1 HH_GIC_SALE HH_ff48_SALE;
run;


*----------------------winsorizing outliers-----------------------------------------------------;
%macro outliers();

%let vars=  l_Size Mtbq l_Past6_ret Profitability3 Investment3 l_Beta l_Idiovol;

%let cvar=%scan(&vars,1);
%let pos=1;

%do %while (&cvar ne );

%put &cvar;

proc sort data=asie.simple_count7i_7; by year month; run;

proc means data=asie.simple_count7i_7 min p1 p99 max median mean n noprint;
by year month;
var &cvar;
output out = asie.outliers 
p1 (&cvar) = p1_&cvar
p99 (&cvar) = p99_&cvar;
run;

proc sql;
create table asie.temp as select a.*, b.p1_&cvar, b.p99_&cvar
from asie.simple_count7i_7 as a left join asie.outliers as b
on a.year=b.year and a.month=b.month;
quit;

data asie.simple_count7i_7; set asie.temp;
if &cvar<p1_&cvar and &cvar ne . and p1_&cvar ne . then &cvar=p1_&cvar;
if &cvar>p99_&cvar and &cvar ne . and p99_&cvar ne . then &cvar=p99_&cvar;
drop p1_&cvar p99_&cvar;
run;

%let pos=%eval(&pos+1);
%let cvar=%scan(&vars,&pos);

%end;

%mend outliers;


%outliers;

proc sort data=asie.simple_count7i_7; by permno year month; run;

data asie.simple_count7j; set asie.simple_count7i_7; run;

/** full sample **/
data asie.simple_count7ja;
set asie.simple_count7j;
/**if r_var le 4;
if r_var ge 5;**/ 
run;

/** targeting **/
data asie.simple_count7ja;
set asie.simple_count7j;
if l_size ne .;
if l3simple_mean_size = . then l3simple_count = 0; 
if l3simple_mean_size<l_size then l3simple_count = 0;
/**if r_var le 4;
if r_var ge 5;**/  
run;

/** admiring **/
data asie.simple_count7ja;
set asie.simple_count7j; 
if l_size ne .;
if l3simple_mean_size = . then l3simple_count = 0;  
if l3simple_mean_size>l_size then l3simple_count = 0;
/**if r_var le 4;
if r_var ge 5;**/  
run;


data asie.simple_count7jb;
set asie.simple_count7ja; 
/**if l3simple_count > 0;**/ /** this to exclude firms that are not mentioned at all **/
ex_ret100=ex_ret*100;
log_size=log(l_Size);
if Mtbq le 0 and Mtbq ne . then Mtbq=0.01;
if Mtbq ge 30 and Mtbq ne . then Mtbq=30;
log_Mtbq=log(Mtbq);
if l3simple_count ne .;
if l3simple_count = 0 then do;
no_mention=1;
end;
else do;
no_mention=0;
end;
run;

data asie.simple_count7jc;
set asie.simple_count7jb;
l_sc=log(1+l3simple_count); 
sc_no_mention=l_sc*no_mention;
size_no_mention=log_size*no_mention;
sc_GIC_OL=l_sc*ind_GIC_OL1;
sc_ff48_OL=l_sc*ind_ff48_OL1;
sc_GIC_HH=l_sc*HH_GIC_SALE;
sc_ff48_HH=l_sc*HH_ff48_SALE;  
run;

proc sort data=asie.simple_count7jc;
by year month;
run;

ods listing close;
ods output parameterestimates=pe fitstatistics=adjrsq;
proc reg data=asie.simple_count7jc;
 by year month;
 model ex_ret100=l_sc ind_GIC_OL1 sc_GIC_OL HH_GIC_SALE sc_GIC_HH no_mention size_no_mention/**target sc_target**/ log_size log_Mtbq l_Past6_ret Profitability3 Investment3 l_Beta l_Idiovol;
run;
quit;
ods listing;

proc means data=pe mean std t probt;
 var estimate; class variable;
run;

/** Newey-West correction **/

proc sort data=pe; by variable; run;

%let lags=12;
ods output parameterestimates=nw;
ods listing close;
proc model data=pe;
 by variable;
 instruments / intonly;
 estimate=a;
 fit estimate / gmm kernel=(bart,%eval(&lags+1),0); run;
quit;
ods listing;

proc print data=nw; id variable;
 var estimate--df; format estimate stderr 7.4;
run;

/** number of observations **/

data asie.obs1;
set asie.simple_count7jc;
if ex_ret100 ne .;
if l_sc ne .;
if ind_GIC_OL1 ne . and HH_GIC_SALE ne .;
if no_mention ne .;
if size_no_mention ne .; 
if log_size ne .;
if log_Mtbq ne .;
if l_Past6_ret ne .;
if Profitability3 ne .;
if Investment3 ne .;
if l_Beta ne .;
if l_Idiovol;
run;

/** mean r-sqaure **/

data asie.rsq;
set adjrsq;
if label2='R-Square';
proc print;
run;

proc means data=asie.rsq noprint;
var nValue2;
output out=asie.rsq_mean mean=rsq_mean;
proc print;
run;


/***************************************************/
/** TABLE 11 
/***************************************************/

/** For full sample, in the codes of TABLES 3 and 4, set all gr_decile=0, and if y=2 **/

/** get the simple mention count files from the codes 'Code_SimpleCount_Base' and 'Code_SimpleCount_hp' **/

data asie.simple_count7a;
set asie.simple_count7_all; /** change simple_count7_all to simple_count7_inter and simple_count7_in 
                                                    and simple_count7_inter_hp and simple_count7_in_hp **/
keep mentionedticker year month l3simple_count l3simple_mean_size l3simple_sum_size l3simple_max_size l3simple_min_size l3simple_size_count;
run;

proc sort data=asie.simple_count7a;
by year month;
run;

proc means data=asie.simple_count7a noprint;
by year month;
var l3simple_sum_size; 
output out=asie.simple_count7a_median mean=max_size_median;
proc print data=asie.simple_count7a_median;
run;

/** merge with the firm characteristics **/

proc sql;
  create table asie.simple_count7b as
  select a.*,b.*
    from asie.firm_characteristics as a left join asie.simple_count7a as b
    on a.tic=b.mentionedticker
    and a.year=b.year 
    and a.month=b.month;
 run;

data asie.simple_count7c;
set asie.simple_count7b;
if mentionedticker = '' then l3simple_count=0;
if mentionedticker = '' then l3simple_mean_size=0;
if mentionedticker = '' then l3simple_sum_size=0;
if mentionedticker = '' then l3simple_max_size=0;
if mentionedticker = '' then l3simple_size_count=0;
run;


/** remove microcap cap stocks **/

proc univariate data=asie.simple_count7c;
var l3simple_count l3simple_mean_size l3simple_sum_size;
run;

proc sql;
  create table asie.simple_count7f as
  select a.*,b.*
    from asie.simple_count7c as a left join asie.size_breakpoints_new as b 
    on a.year=b.year 
    and a.month=b.month;
 run;

data asie.simple_count7g;
set asie.simple_count7f;
if l_size ne .;
l_size_x=l_size;
run;

proc sort data=asie.simple_count7g;
by year month l_size;
run;

data asie.simple_count7h;
set asie.simple_count7g;
if l_size/1000 le p10 then do;
r_var=0;
end;
else do;
if l_size/1000 le p20 then do;
r_var=1;
end;
else do;
if l_size/1000 le p30 then do;
r_var=2;
end;
else do;
if l_size/1000 le p40 then do;
r_var=3;
end;
else do;
if l_size/1000 le p50 then do;
r_var=4;
end;
else do;
if l_size/1000 le p60 then do;
r_var=5;
end;
else do;
if l_size/1000 le p70 then do;
r_var=6;
end;
else do;
if l_size/1000 le p80 then do;
r_var=7;
end;
else do;
if l_size/1000 le p90 then do;
r_var=8;
end;
else do;
r_var=9;
end;
end;
end;
end;
end;
end;
end;
end;
end;
run;

proc freq data=asie.simple_count7h;
tables r_var;
run;

data asie.simple_count7i;
set asie.simple_count7h;
if abs(prc) ge 5;
if r_var ge 2;
run;

/*****************************************************************************/
/** portfolios sorts, from the code 'PortfolioSort_SizeDecileResid_in_3'
/*****************************************************************************/

/** Calculating average excess returns **/

data asie.qf7b;
set asie.simple_count7i;
if l3simple_count > 0;
if l3simple_mean_size>l_size; /** this is the admiring/targeting classification, and remove this line for all mentions **/
if l3simple_mean_size ne .;
if l_size ne .;
run;

proc sort data=asie.qf7b;
by year month;
run;

proc means data=asie.qf7b noprint;
by year month;
var ex_ret/weight=l_size; 
output out=asie.qf8 mean=mean_exret;
run;

/** Adding the factors **/

data asie.qf8a;
set asie.qf8;
if _freq_ ge 5;
run;

proc means data=asie.qf8a noprint; /** this is the average portfolio size **/
var _freq_; 
output out=asie.qf8a_port mean=firm_port;
proc print;
run;

proc sql;
  create table asie.qf9 as
  select a.*,b.*
    from asie.qf8a as a, asie.factorsmonthly2017 as b
    where a.year=b.year
    and a.month=b.month;
 run;

data asie.qf10;
set asie.qf9;
if year ge 1994;
run;

proc sort data=asie.qf10;
by year month;
run;

proc reg data=asie.qf10;
model mean_exret=emkt smb hml rmw cma mom;
run;
quit;


/** This is for only non-mentioned firms **/

data asie.qf7b; 
set asie.simple_count7i; 
if l3simple_count = 0 ;  
run;

proc sort data=asie.qf7b;
by year month;
run;

proc means data=asie.qf7b noprint;
by year month;
var ex_ret/weight=l_size; 
output out=asie.qf8 mean=mean_exret;
run;

/** Adding the factors **/

data asie.qf8a;
set asie.qf8;
if _freq_ ge 5;
run;

proc means data=asie.qf8a noprint; /** this is the average portfolio size **/
var _freq_; 
output out=asie.qf8a_port mean=firm_port;
proc print;
run;

proc sql;
  create table asie.qf9 as
  select a.*,b.*
    from asie.qf8a as a, asie.factorsmonthly2017 as b
    where a.year=b.year
    and a.month=b.month;
 run;

data asie.qf10;
set asie.qf9;
if year ge 1994;
run;

proc sort data=asie.qf10;
by year month;
run;

proc reg data=asie.qf10;
model mean_exret=emkt smb hml rmw cma mom;
run;
quit;


/***************************************************/
/** TABLE 12 
/***************************************************/

/** get the simple mention count mentioning file 'simple_count7_mentioning' from the code 'Code_SimpleCount_Base_Mentioning' **/

data asie.simple_count7a;
set asie.simple_count7_mentioning; 
keep mentionedticker year month l3simple_count l3simple_mean_size l3simple_sum_size l3simple_size_count;
run;

/** merge with the firm characteristics **/

data asie.main_file1;
set asie.firm_characteristics;
if year ge 1994;
keep tic year month ex_ret prc l_Size Mtbq l_Past6_ret Profitability3 Investment3 l_Beta l_Idiovol;
run;

proc sql;
  create table asie.main_file2 as
  select a.*,b.*
    from asie.simple_count7a as a left join asie.main_file1 as b
    on a.mentionedticker=b.tic 
    and a.year=b.year
	and a.month=b.month;
 run;

proc sort data=asie.main_file2 nodupkey;
by mentionedticker year month;
run;

data asie.simple_count7c;
set asie.main_file2;
if mentionedticker ne '';
run;

/** remove microcap cap stocsk **/

proc univariate data=asie.simple_count7c;
var l3simple_count l3simple_mean_size l3simple_sum_size;
run;

proc sql;
  create table asie.simple_count7f as
  select a.*,b.*
    from asie.simple_count7c as a left join asie.size_breakpoints_new as b 
    on a.year=b.year 
    and a.month=b.month;
 run;

data asie.simple_count7g;
set asie.simple_count7f;
if l_size ne .;
l_size_x=l_size;
drop gr_decile;
run;

proc sort data=asie.simple_count7g;
by year month l_size;
run;

data asie.simple_count7h;
set asie.simple_count7g;
if l_size/1000 le p10 then do;
r_var=0;
end;
else do;
if l_size/1000 le p20 then do;
r_var=1;
end;
else do;
if l_size/1000 le p30 then do;
r_var=2;
end;
else do;
if l_size/1000 le p40 then do;
r_var=3;
end;
else do;
if l_size/1000 le p50 then do;
r_var=4;
end;
else do;
if l_size/1000 le p60 then do;
r_var=5;
end;
else do;
if l_size/1000 le p70 then do;
r_var=6;
end;
else do;
if l_size/1000 le p80 then do;
r_var=7;
end;
else do;
if l_size/1000 le p90 then do;
r_var=8;
end;
else do;
r_var=9;
end;
end;
end;
end;
end;
end;
end;
end;
end;
run;

proc sort data=asie.simple_count7h;
by r_var year month;
run;

proc means data=asie.simple_count7h noprint;
by r_var year month;
var ex_ret/weight=l_size_x; 
output out=asie.simple_count7h_mean mean=size_exret;
proc print;
run;

proc sql;
  create table asie.simple_count7h_a as
  select a.*,b.*
    from asie.simple_count7h as a left join asie.simple_count7h_mean as b 
    on a.r_var=b.r_var
    and a.year=b.year 
    and a.month=b.month;
 run;

data asie.simple_count7h_b;
set asie.simple_count7h_a;
ex_ret_size=ex_ret-size_exret;
run;

proc freq data=asie.simple_count7h_b;
tables r_var;
run;

data asie.simple_count7i;
set asie.simple_count7h_b;
if abs(prc) ge 5;
if r_var ge 2;
run;


/***************************/
/** Single sort **/
/***************************/

proc univariate data=asie.simple_count7i;
var l3simple_count l3simple_mean_size l3simple_sum_size;
run;

proc freq data=asie.simple_count7i;
tables l3simple_count;
run;

data asie.simple_count7j;
set asie.simple_count7i;
alt_var=l3simple_count; 
if alt_var ne .;
run;

/** sorting mentions into 0,1,2,and 3+ groups **/

data asie.qf6_full_no_mention;
set asie.simple_count7j;
if l3simple_count=0;
gr_decile=-1;
run;

data asie.qf6_full_mention;
set asie.simple_count7j;
if l3simple_count>0;
if l3simple_mean_size>l_size; /** this is the admiring/targeting classification **/
if l3simple_mean_size ne .;
if l_size ne .;
run;

proc freq data=asie.qf6_full_mention;
tables l3simple_count;
run;

data asie.qf6_full_mention_a;
set asie.qf6_full_mention;
if l3simple_count=1 then do;
gr_decile=0;
end;
else do;
if l3simple_count=2 then do;
gr_decile=1;
end;
else do;
gr_decile=2;
end;
end;
run;

proc freq data=asie.qf6_full_mention_a;
tables gr_decile;
run;

/**proc sort data=asie.qf6_full_mention;
by year month alt_var;
run;

proc rank groups=3 data=asie.qf6_full_mention out=asie.qf6_full_mention_a;
var alt_var; 
ranks gr_decile;
by year month;
run;**/

data asie.qf6_full_d;
set asie.qf6_full_no_mention asie.qf6_full_mention_a;
run;

/** Calculating average excess returns **/

data asie.qf7a;
set asie.qf6_full_d;
/**if l3simple_count > 0;**/ /** use this condition for only mentioned firms **/
run;

proc sort data=asie.qf7a;
by year month gr_decile;
run;

proc means data=asie.qf7a noprint;
by year month gr_decile;
var ex_ret/weight=l_size_x; /** change ex_ret to ex_ret_size for size-decile adjusted return **/
output out=asie.qf8 mean=mean_exret;
run;

/** Adding the factors **/

data asie.qf8a;
set asie.qf8;
if _freq_ ge 5;
run;

proc sql;
  create table asie.qf9 as
  select a.*,b.*
    from asie.qf8a as a, asie.factorsmonthly2017 as b
    where a.year=b.year
    and a.month=b.month;
 run;

data asie.qf9a;
set asie.qf9;
if year ge 1994;
run;

proc sort data=asie.qf9a;
by year month gr_decile;
run;

data asie.qf9b;
set asie.qf9a;
by year month gr_decile;
if first.month then do x=0;
end;
x+1;
run;

proc sort data=asie.qf9b;
by descending year descending month descending x;
run;

data asie.qf9c;
set asie.qf9b;
by descending year descending month descending x;
if first.month then do y=x;
end;
y+0;
run;

proc sort data=asie.qf9c;
by year month gr_decile;
run;

data asie.qf10;
set asie.qf9c;
if y=4;
run;

proc sort data=asie.qf10; 
by gr_decile year month;
run;

proc means data=asie.qf10 noprint; /** this is the average portfolio size **/
by gr_decile;
var _freq_; 
output out=asie.qf10_port mean=firm_port;
proc print;
run;

proc sort data=asie.qf10; 
by gr_decile year month;
run;

proc reg data=asie.qf10;
by gr_decile;
model mean_exret=;
run;

/**proc reg data=asie.qf10;
by gr_decile;
model mean_exret=emkt;
run;

proc reg data=asie.qf10;
by gr_decile;
model mean_exret=emkt smb hml;
run;

proc reg data=asie.qf10;
by gr_decile;
model mean_exret=emkt smb hml mom;
run;

proc reg data=asie.qf10;
by gr_decile;
model mean_exret=emkt smb hml rmw cma;
run;**/

proc reg data=asie.qf10;
by gr_decile;
model mean_exret=emkt smb hml rmw cma mom;
run;


/** Mean return for the difference **/

proc sort data=asie.qf10;
by year month gr_decile;
run;

/** high mention vs no mention **/

data asie.qf11; /** this is for portfolio 5-0 **/
set asie.qf10;
lag3_grd=lag3(gr_decile);
lag3_meanf=lag3(mean_exret);
lag3year=lag3(year);
if gr_decile=2 and lag3_grd=-1 and lag3year=year then
p_ret=mean_exret-lag3_meanf;
if gr_decile=2; 
run;

/** high mention vs low mention **/

data asie.qf11; /** this is for portfolio 5-0 **/
set asie.qf10;
lag2_grd=lag2(gr_decile);
lag2_meanf=lag2(mean_exret);
lag2year=lag2(year);
if gr_decile=2 and lag2_grd=0 and lag2year=year then
p_ret=mean_exret-lag2_meanf;
if gr_decile=2; 
run;

proc sort data=asie.qf11;
by year month;
run;

proc reg data=asie.qf11;
model p_ret=;
run;

/**proc reg data=asie.qf11;
model p_ret=emkt;
run;

proc reg data=asie.qf11;
model p_ret=emkt smb hml;
run;

proc reg data=asie.qf11;
model p_ret=emkt smb hml mom;
run;

proc reg data=asie.qf11;
model p_ret=emkt smb hml rmw cma;
run;**/

proc reg data=asie.qf11;
model p_ret=emkt smb hml rmw cma mom;
run;


/***************************************************/
/** TABLE 13 
/***************************************************/

/** 1. changes in competion mention (lazy mentions)
/** 2. shared analysts
/** 3. news co-mention

/** 1. do the FM regrssion for each variable in their original (or possible) sample 
/** 2. do the FM regression with both SC and the variables


/*********************************************************************/
/** shared analyst - from the code 'Code_DoubleSort_SharedAnalyssts'
/*********************************************************************/

/** add stock returns from CRSP**/

proc sql;
  create table asie.shared_analyst6_a as
  select a.*,b.*
    from asie.shared_analyst6 as a left join asie.stock_returns as b
    on a.ticker1=b.tic
    and a.year_p=b.year 
    and a.month_p=b.month;
 run;

proc sort data=asie.shared_analyst6_a nodupkey;
by ticker1 year_p month_p;
run;

*----------------------winsorizing outliers-----------------------------------------------------;
%macro outliers();

%let vars=  l_Size Mtbq l_Past6_ret Profitability3 Investment3 l_Beta l_Idiovol;

%let cvar=%scan(&vars,1);
%let pos=1;

%do %while (&cvar ne );

%put &cvar;

proc sort data=asie.shared_analyst6_a; by year month; run;

proc means data=asie.shared_analyst6_a min p1 p99 max median mean n noprint;
by year month;
var &cvar;
output out = asie.outliers 
p1 (&cvar) = p1_&cvar
p99 (&cvar) = p99_&cvar;
run;

proc sql;
create table asie.temp as select a.*, b.p1_&cvar, b.p99_&cvar
from asie.shared_analyst6_a as a left join asie.outliers as b
on a.year=b.year and a.month=b.month;
quit;

data asie.shared_analyst6_a; set asie.temp;
if &cvar<p1_&cvar and &cvar ne . and p1_&cvar ne . then &cvar=p1_&cvar;
if &cvar>p99_&cvar and &cvar ne . and p99_&cvar ne . then &cvar=p99_&cvar;
drop p1_&cvar p99_&cvar;
run;

%let pos=%eval(&pos+1);
%let cvar=%scan(&vars,&pos);

%end;

%mend outliers;


%outliers;

proc sort data=asie.shared_analyst6_a; by permno year month; run;

data asie.shared_analyst6_b; set asie.shared_analyst6_a; run;


data asie.shared_analyst6_c;
set asie.shared_analyst6_b;
ret100=ret*100;
log_size=log(l_size);
log_mtb=log(Mtbq);
keep ticker1 year month ret100 sa_lret prc l_size log_mtb log_size log_mtb l_Past6_ret Profitability3 Investment3 l_Beta l_Idiovol;
run;

/** remove microcap stocks **/

proc sql;
  create table asie.shared_analyst6_d as
  select a.*,b.*
    from asie.shared_analyst6_c as a left join asie.size_breakpoints_new as b 
    on a.year=b.year 
    and a.month=b.month;
 run;

data asie.shared_analyst6_e;
set asie.shared_analyst6_d;
/**if l_size/1000 > p20;**/
if abs(prc) ge 5;
run;

/** add the simple count from TABLE 1 **/

proc sql;
  create table asie.shared_analyst6_f as
  select a.*,b.*
    from asie.shared_analyst6_e as a left join asie.simple_count7i as b
    on a.ticker1=b.mentionedticker
    and a.year=b.year 
    and a.month=b.month;
 run;

data asie.shared_analyst6_g;
set asie.shared_analyst6_f;
if l3simple_count = 0 then do;
no_mention=1;
end;
else do;
no_mention=0;
end;
l_sc=log(1+l3simple_count); 
sc_no_mention=l_sc*no_mention;
size_no_mention=log_size*no_mention;
run; 

/** NOTE: the combined regressioin with simple count and shared analysts, to do in the code 'Code_DoubleSort_SharedAnalysts' **/

proc sort data=asie.shared_analyst6_g;
by year month;
run;

ods listing close;
ods output parameterestimates=pe fitstatistics=adjrsq;
proc reg data=asie.shared_analyst6_g;
 by year month;
 model ret100=sa_lret log_size log_mtb l_Past6_ret ;
run;
quit;
ods listing;

proc means data=pe mean std t probt;
 var estimate; class variable;
run;

/** Newey-West correction **/

proc sort data=pe; by variable; run;

%let lags=12;
ods output parameterestimates=nw;
ods listing close;
proc model data=pe;
 by variable;
 instruments / intonly;
 estimate=a;
 fit estimate / gmm kernel=(bart,%eval(&lags+1),0); run;
quit;
ods listing;

proc print data=nw; id variable;
 var estimate--df; format estimate stderr 7.4;
run;

/** number of observations **/

data asie.obs1;
set asie.shared_analyst6_g;
if ret100 ne .;
if sa_lret ne .;
if log_size ne .;
if log_Mtb ne .;
if l_Past6_ret ne .;
run;

/** mean r-sqaure **/

data asie.rsq;
set adjrsq;
if label2='R-Square';
proc print;
run;

proc means data=asie.rsq noprint;
var nValue2;
output out=asie.rsq_mean mean=rsq_mean;
proc print;
run;


/*********************************************************************/
/** co-mention news - from the code 'Overlap_Scherbina_Schlusche'
/*********************************************************************/

/** get the news co-mention stock returns **/

data asie.firm_characteristics_a;
set asie.firm_characteristics;
keep tic permno year month ret;
if year ge 2010;
run;

proc sort data=asie.firm_characteristics_a;
by permno year month;
run;

data asie.firm_characteristics_b;
set asie.firm_characteristics_a;
lp=lag(permno);
lag_ret=lag(ret);
if lp=permno then do;
lret=lag_ret;
end;
run;

proc sql;
  create table asie.news_links4 as
  select a.*,b.*
    from asie.news_links3 as a left join asie.firm_characteristics_b as b
    on a.connected_firm_ticker=b.tic
    and a.year=b.year 
    and a.month=b.month;
 run;

proc sort data=asie.news_links4;
by centroid_firm_ticker year month;
run;

proc means data=asie.news_links4 noprint;
by centroid_firm_ticker year month;
var lret/weight=weight;
output out=asie.news_links5 mean=cm_lret;
proc print;
run;

/** add stock returns **/

proc sql;
  create table asie.news_links6 as
  select a.*,b.*
    from asie.news_links5 as a left join asie.stock_returns as b
    on a.centroid_firm_ticker=b.tic
    and a.year=b.year 
    and a.month=b.month;
 run;

proc sort data=asie.news_links6 nodupkey;
by centroid_firm_ticker year month;
run;

*----------------------winsorizing outliers-----------------------------------------------------;
%macro outliers();

%let vars=  l_Size Mtbq l_Past6_ret Profitability3 Investment3 l_Beta l_Idiovol;

%let cvar=%scan(&vars,1);
%let pos=1;

%do %while (&cvar ne );

%put &cvar;

proc sort data=asie.news_links6; by year month; run;

proc means data=asie.news_links6 min p1 p99 max median mean n noprint;
by year month;
var &cvar;
output out = asie.outliers 
p1 (&cvar) = p1_&cvar
p99 (&cvar) = p99_&cvar;
run;

proc sql;
create table asie.temp as select a.*, b.p1_&cvar, b.p99_&cvar
from asie.news_links6 as a left join asie.outliers as b
on a.year=b.year and a.month=b.month;
quit;

data asie.news_links6; set asie.temp;
if &cvar<p1_&cvar and &cvar ne . and p1_&cvar ne . then &cvar=p1_&cvar;
if &cvar>p99_&cvar and &cvar ne . and p99_&cvar ne . then &cvar=p99_&cvar;
drop p1_&cvar p99_&cvar;
run;

%let pos=%eval(&pos+1);
%let cvar=%scan(&vars,&pos);

%end;

%mend outliers;


%outliers;

proc sort data=asie.news_links6; by permno year month; run;

data asie.news_links7; set asie.news_links6; run;


data asie.news_links8;
set asie.news_links7;
ret100=ret*100;
log_size=log(l_size);
log_mtb=log(Mtbq);
keep centroid_firm_ticker year month ret100 cm_lret prc l_size l_mtbq log_size log_mtb l_Past6_ret Profitability3 Investment3 l_Beta l_Idiovol;
run;

/** remove microcap stocks **/

proc sql;
  create table asie.news_links9 as
  select a.*,b.*
    from asie.news_links8 as a left join asie.size_breakpoints_new as b 
    on a.year=b.year 
    and a.month=b.month;
 run;

data asie.news_links10;
set asie.news_links9;
if l_size/1000 > p20;
if abs(prc) ge 5;
run;

/** add the simple count from the code of TABLE 1 **/

proc sql;
  create table asie.news_links11 as
  select a.*,b.*
    from asie.news_links10 as a left join asie.simple_count7i as b
    on a.centroid_firm_ticker=b.mentionedticker
    and a.year=b.year 
    and a.month=b.month;
 run;

data asie.news_links12;
set asie.news_links11;
if l3simple_count = 0 then do;
no_mention=1;
end;
else do;
no_mention=0;
end;
l_sc=log(1+l3simple_count); 
sc_no_mention=l_sc*no_mention;
size_no_mention=log_size*no_mention;
run; 

proc sort data=asie.news_links12;
by year month;
run;

ods listing close;
ods output parameterestimates=pe fitstatistics=adjrsq;
proc reg data=asie.news_links12;
 by year month;
 model ret100=l_sc no_mention size_no_mention cm_lret log_size log_mtb l_Past6_ret ;
run;
quit;
ods listing;

proc means data=pe mean std t probt;
 var estimate; class variable;
run;

/** Newey-West correction **/

proc sort data=pe; by variable; run;

%let lags=12;
ods output parameterestimates=nw;
ods listing close;
proc model data=pe;
 by variable;
 instruments / intonly;
 estimate=a;
 fit estimate / gmm kernel=(bart,%eval(&lags+1),0); run;
quit;
ods listing;

proc print data=nw; id variable;
 var estimate--df; format estimate stderr 7.4;
run;

/** number of observations **/

data asie.obs1;
set asie.news_links12;
if ret100 ne .;
if cm_lret ne .;
if l_sc ne .;
if no_mention ne .;
if size_no_mention ne .; 
if log_size ne .;
if log_Mtb ne .;
if l_Past6_ret ne .;
run;

/** mean r-sqaure **/

data asie.rsq;
set adjrsq;
if label2='R-Square';
proc print;
run;

proc means data=asie.rsq noprint;
var nValue2;
output out=asie.rsq_mean mean=rsq_mean;
proc print;
run;


/*********************************************************************/
/** changes in simple count - from the code 'SimpleCountTests_Changes'
/*********************************************************************/

/** get the news comention stock returns **/

data asie.SimpleCount_Changes1;
set asie.qf6_full_d; /** this file is from the code 'SimpleCountTests_Changes' **/
run;

*----------------------winsorizing outliers-----------------------------------------------------;
%macro outliers();

%let vars=  l_Size Mtbq l_Past6_ret Profitability3 Investment3 l_Beta l_Idiovol;

%let cvar=%scan(&vars,1);
%let pos=1;

%do %while (&cvar ne );

%put &cvar;

proc sort data=asie.SimpleCount_Changes1; by year month; run;

proc means data=asie.SimpleCount_Changes1 min p1 p99 max median mean n noprint;
by year month;
var &cvar;
output out = asie.outliers 
p1 (&cvar) = p1_&cvar
p99 (&cvar) = p99_&cvar;
run;

proc sql;
create table asie.temp as select a.*, b.p1_&cvar, b.p99_&cvar
from asie.SimpleCount_Changes1 as a left join asie.outliers as b
on a.year=b.year and a.month=b.month;
quit;

data asie.SimpleCount_Changes1; set asie.temp;
if &cvar<p1_&cvar and &cvar ne . and p1_&cvar ne . then &cvar=p1_&cvar;
if &cvar>p99_&cvar and &cvar ne . and p99_&cvar ne . then &cvar=p99_&cvar;
drop p1_&cvar p99_&cvar;
run;

%let pos=%eval(&pos+1);
%let cvar=%scan(&vars,&pos);

%end;

%mend outliers;


%outliers;

proc sort data=asie.SimpleCount_Changes1; by permno year month; run;

data asie.SimpleCount_Changes2; set asie.SimpleCount_Changes1; run;


data asie.simple_count7jb;
set asie.SimpleCount_Changes2; 
/**if l3simple_count > 0;**/ /** this to exclude firms that are not mentioned at all **/
ex_ret100=ex_ret*100;
log_size=log(l_Size);
if Mtbq le 0 and Mtbq ne . then Mtbq=0.01;
if Mtbq ge 30 and Mtbq ne . then Mtbq=30;
log_Mtbq=log(Mtbq);
if l3simple_count = 0 then do;
no_mention=1;
end;
else do;
no_mention=0;
end;
run;

data asie.simple_count7jc;
set asie.simple_count7jb;
l_sc=log(1+l3simple_count); 
sc_no_mention=l_sc*no_mention;
size_no_mention=log_size*no_mention;
run;

proc sort data=asie.simple_count7jc;
by year month;
run;

ods listing close;
ods output parameterestimates=pe fitstatistics=adjrsq;
proc reg data=asie.simple_count7jc;
 by year month;
 model ex_ret100=gr_decile l_sc no_mention size_no_mention/**target sc_target**/ log_size log_Mtbq l_Past6_ret Profitability3 Investment3 l_Beta l_Idiovol;
run;
quit;
ods listing;

proc means data=pe mean std t probt;
 var estimate; class variable;
run;

/** Newey-West correction **/

proc sort data=pe; by variable; run;

%let lags=12;
ods output parameterestimates=nw;
ods listing close;
proc model data=pe;
 by variable;
 instruments / intonly;
 estimate=a;
 fit estimate / gmm kernel=(bart,%eval(&lags+1),0); run;
quit;
ods listing;

proc print data=nw; id variable;
 var estimate--df; format estimate stderr 7.4;
run;

/** number of observations **/

data asie.obs1;
set asie.simple_count7jc;
if ex_ret100 ne .;
if gr_decile ne .;
if l_sc ne .;
if no_mention ne .;
if size_no_mention ne .; 
if log_size ne .;
if log_Mtbq ne .;
if l_Past6_ret ne .;
if Profitability3 ne .;
if Investment3 ne .;
if l_Beta ne .;
if l_Idiovol;
run;

/** mean r-sqaure **/

data asie.rsq;
set adjrsq;
if label2='R-Square';
proc print;
run;

proc means data=asie.rsq noprint;
var nValue2;
output out=asie.rsq_mean mean=rsq_mean;
proc print;
run;


/***************************************************/
/** TABLE 14 
/***************************************************/

/*********************/
/** Subsampls 
/*********************/

/*******************************************************************/
/** Single sort
/*******************************************************************/

proc univariate data=asie.simple_count7i;
var l3simple_count l3simple_mean_size l3simple_sum_size;
run;

proc freq data=asie.simple_count7i;
tables l3simple_count;
run;

data asie.simple_count7j;
set asie.simple_count7i;
alt_var=l3simple_count; 
if alt_var ne .;
/**if month ne 1;
if year le 2006;
if year ge 2007;**/
run;

/** sorting mentions into 0,1,2,and 3+ groups **/

data asie.qf6_full_no_mention;
set asie.simple_count7j;
if l3simple_count=0;
gr_decile=-1;
run;

data asie.qf6_full_mention;
set asie.simple_count7j;
if l3simple_count>0;
if l3simple_mean_size ne .;
if l_size ne .;
run;

proc freq data=asie.qf6_full_mention;
tables l3simple_count;
run;

data asie.qf6_full_mention_a;
set asie.qf6_full_mention;
if l3simple_count=1 then do;
gr_decile=0;
end;
else do;
if l3simple_count=2 then do;
gr_decile=1;
end;
else do;
gr_decile=2;
end;
end;
run;

proc freq data=asie.qf6_full_mention_a;
tables gr_decile;
run;

/**proc sort data=asie.qf6_full_mention;
by year month alt_var;
run;

proc rank groups=3 data=asie.qf6_full_mention out=asie.qf6_full_mention_a;
var alt_var; 
ranks gr_decile;
by year month;
run;**/

data asie.qf6_full_d;
set asie.qf6_full_no_mention asie.qf6_full_mention_a;
run;

/** Calculating average excess returns **/

data asie.qf7a;
set asie.qf6_full_d;
/**if l3simple_count > 0;**/ /** use this condition for only mentioned firms **/
run;

proc sort data=asie.qf7a;
by year month gr_decile;
run;

proc means data=asie.qf7a noprint;
by year month gr_decile;
var ex_ret/weight=l_size_x; /** change ex_ret to ex_ret_size for size-decile adjusted return **/
output out=asie.qf8 mean=mean_exret;
run;

/** Adding the factors **/

data asie.qf8a;
set asie.qf8;
if _freq_ ge 5;
run;

proc sql;
  create table asie.qf9 as
  select a.*,b.*
    from asie.qf8a as a, asie.factorsmonthly2017 as b
    where a.year=b.year
    and a.month=b.month;
 run;

data asie.qf9a;
set asie.qf9;
if year ge 1994;
run;

proc sort data=asie.qf9a;
by year month gr_decile;
run;

data asie.qf9b;
set asie.qf9a;
by year month gr_decile;
if first.month then do x=0;
end;
x+1;
run;

proc sort data=asie.qf9b;
by descending year descending month descending x;
run;

data asie.qf9c;
set asie.qf9b;
by descending year descending month descending x;
if first.month then do y=x;
end;
y+0;
run;

proc sort data=asie.qf9c;
by year month gr_decile;
run;

data asie.qf10;
set asie.qf9c;
if y=4;
run;

proc sort data=asie.qf10; 
by gr_decile year month;
run;

proc means data=asie.qf10 noprint; /** this is the average portfolio size **/
by gr_decile;
var _freq_; 
output out=asie.qf10_port mean=firm_port;
proc print;
run;

/** add the NBER recession dummy **/

proc sql;
  create table asie.subsample1 as
  select a.*,b.*
    from asie.qf10 as a left join asie.nber1 as b
    on a.year=b.yearv
    and a.month=b.monthv;
 run;

data asie.subsample2;
set asie.subsample1;
if recession=. then recession=0;
proc print;
run;

data asie.qf10;
set asie.subsample2;
if month ne 1; /** change to if recession = 0 and to if year > 2006 **/
run;

proc sort data=asie.qf10; 
by gr_decile year month;
run;

proc reg data=asie.qf10;
by gr_decile;
model mean_exret=;
run;

/**proc reg data=asie.qf10;
by gr_decile;
model mean_exret=emkt;
run;

proc reg data=asie.qf10;
by gr_decile;
model mean_exret=emkt smb hml;
run;

proc reg data=asie.qf10;
by gr_decile;
model mean_exret=emkt smb hml mom;
run;

proc reg data=asie.qf10;
by gr_decile;
model mean_exret=emkt smb hml rmw cma;
run;**/

proc reg data=asie.qf10;
by gr_decile;
model mean_exret=emkt smb hml rmw cma mom;
run;


/** Mean return for the difference **/

proc sort data=asie.qf10;
by year month gr_decile;
run;

/** high mention vs no mention **/

data asie.qf11; /** this is for portfolio 5-0 **/
set asie.qf10;
lag3_grd=lag3(gr_decile);
lag3_meanf=lag3(mean_exret);
lag3year=lag3(year);
if gr_decile=2 and lag3_grd=-1 and lag3year=year then
p_ret=mean_exret-lag3_meanf;
if gr_decile=2; 
run;

/** high mention vs low mention **/

data asie.qf11; /** this is for portfolio 5-0 **/
set asie.qf10;
lag2_grd=lag2(gr_decile);
lag2_meanf=lag2(mean_exret);
lag2year=lag2(year);
if gr_decile=2 and lag2_grd=0 and lag2year=year then
p_ret=mean_exret-lag2_meanf;
if gr_decile=2; 
run;

proc sort data=asie.qf11;
by year month;
run;

proc reg data=asie.qf11;
model p_ret=;
run;

/**proc reg data=asie.qf11;
model p_ret=emkt;
run;

proc reg data=asie.qf11;
model p_ret=emkt smb hml;
run;

proc reg data=asie.qf11;
model p_ret=emkt smb hml mom;
run;

proc reg data=asie.qf11;
model p_ret=emkt smb hml rmw cma;
run;**/

proc reg data=asie.qf11;
model p_ret=emkt smb hml rmw cma mom;
run;


/*********************/
/** long horizons 
/*********************/

data asie.model4x;
set asie.qf6_full_d; /** this file is from the code of TABLE 3 **/
lvalue_gap=gr_decile;
if gr_decile ne .;
keep mentionedticker year month lvalue_gap ex_ret l_size_x;
run;

/**************************************************************************
/** holding period of 1 month **/

data asie.ds1;
set asie.model4x;
run;

proc sort data=asie.ds1;
by mentionedticker year month;
run;

data asie.ds2;
set asie.ds1;

   lp1=lag(mentionedticker);
         
   lagv1=lvalue_gap;
                     
   if mentionedticker=lp1 then 

   lv1=lagv1;
                    
   run;

/** portfolio 1**/

data asie.ds3;
set asie.ds2;
run;

data asie.ds4;
set asie.ds3;
lv_p=lv1;
run;

proc sort data=asie.ds4;
by year month lv_p;
run;

proc means data=asie.ds4 noprint;
by year month lv_p;
var ex_ret /weight = l_size_x;
output out=asie.p1 mean=mean1_fexret;
run;

/** One file **/

data asie.all;
merge  asie.p1;
by year month lv_p;
run;

data asie.CHS12;
set asie.all;
m_fexret=mean1_fexret;
run;

proc sort data=asie.CHS12;
by lv_p year month;
run; 

/** Adding the factors **/
data asie.CHS13;
set asie.CHS12;


if year ge 1994;
if lv_p ne .;
run;

proc sql;
  create table asie.CHS14 as
  select a.*,b.*
    from asie.CHS13 as a, asie.factorsmonthly2017 as b
    where a.year=b.year 
    and a.month=b.month;
 run;

data asie.CHS15;
set asie.CHS14;
keep year month lv_p m_fexret emkt smb hml rmw cma mom;
run;

proc sort data=asie.CHS15;  
by lv_p year month;
run;

/** Mean returns of the 6 portolios **/

/**proc reg data=asie.CHS15;
by lv_p;
model m_fexret=;
run;

proc reg data=asie.CHS15;
by lv_p;
model m_fexret=emkt;
run;

proc reg data=asie.CHS15;
by lv_p;
model m_fexret=emkt smb hml;
run;

proc reg data=asie.CHS15;
by lv_p;
model m_fexret=emkt smb hml mom;
run;

proc reg data=asie.CHS15;
by lv_p;
model m_fexret=emkt smb hml rmw cma;
run;**/

proc reg data=asie.CHS15;
by lv_p;
model m_fexret=emkt smb hml rmw cma mom;
run;


/** Mean return for the difference between the first and 3rd relative value portfolios **/

proc sort data=asie.chs15;
by year month lv_p;
run;

data asie.chs16;
set asie.chs15;
lag3_lv_p=lag3(lv_p);
lag3_meanf=lag3(m_fexret);
lag3_year=lag3(year);
lag3_month=lag3(month);
if lv_p=2 and lag3_lv_p=-1 and lag3_year=year and lag3_month=month then
p_ret=m_fexret-lag3_meanf;
if lv_p=2; 
run;

proc sort data=asie.chs16;
by year month;
run;

/**proc reg data=asie.CHS16;
model p_ret=;
run;

proc reg data=asie.CHS16;
model p_ret=emkt;
run;

proc reg data=asie.CHS16;
model p_ret=emkt smb hml;
run;

proc reg data=asie.CHS16;
model p_ret=emkt smb hml mom;
run;

proc reg data=asie.CHS16;
model p_ret=emkt smb hml rmw cma;
run;**/

proc reg data=asie.CHS16;
model p_ret=emkt smb hml rmw cma mom;
run;

data asie.chs16;
set asie.chs15;
lag2_lv_p=lag2(lv_p);
lag2_meanf=lag2(m_fexret);
lag2_year=lag2(year);
lag2_month=lag2(month);
if lv_p=2 and lag2_lv_p=0 and lag2_year=year and lag2_month=month then
p_ret=m_fexret-lag2_meanf;
if lv_p=2; 
run;

proc sort data=asie.chs16;
by year month;
run;

/**proc reg data=asie.CHS16;
model p_ret=;
run;

proc reg data=asie.CHS16;
model p_ret=emkt;
run;

proc reg data=asie.CHS16;
model p_ret=emkt smb hml;
run;

proc reg data=asie.CHS16;
model p_ret=emkt smb hml mom;
run;

proc reg data=asie.CHS16;
model p_ret=emkt smb hml rmw cma;
run;**/

proc reg data=asie.CHS16;
model p_ret=emkt smb hml rmw cma mom;
run;



/**************************************************************************
/** holding period of 3 months **/

data asie.ds1;
set asie.model4x;
run;

proc sort data=asie.ds1;
by mentionedticker year month;
run;

data asie.ds2;
set asie.ds1;

   lp3=lag3(mentionedticker);

   lagv1=lvalue_gap;
   lagv2=lag(lvalue_gap);
   lagv3=lag2(lvalue_gap);
                   
   if mentionedticker=lp3 then 

   lv1=lagv1;
   lv2=lagv2;
   lv3=lagv3;
                  
   run;

/** portfolio 3**/

data asie.ds3;
set asie.ds2;
run;

data asie.ds4;
set asie.ds3;
lv_p=lv3;
run;

proc sort data=asie.ds4;
by year month lv_p;
run;

proc means data=asie.ds4 noprint;
by year month lv_p;
var ex_ret /weight = l_size_x;
output out=asie.p3 mean=mean3_fexret;
run;

/** portfolio 2**/

data asie.ds3;
set asie.ds2;
run;

data asie.ds4;
set asie.ds3;
lv_p=lv2;
run;

proc sort data=asie.ds4;
by year month lv_p;
run;

proc means data=asie.ds4 noprint;
by year month lv_p;
var ex_ret /weight = l_size_x;
output out=asie.p2 mean=mean2_fexret;
run;

/** portfolio 1**/

data asie.ds3;
set asie.ds2;
run;

data asie.ds4;
set asie.ds3;
lv_p=lv1;
run;

proc sort data=asie.ds4;
by year month lv_p;
run;

proc means data=asie.ds4 noprint;
by year month lv_p;
var ex_ret /weight = l_size_x;
output out=asie.p1 mean=mean1_fexret;
run;

/** Merge the 3 files **/

data asie.all;
merge  asie.p3 asie.p2 asie.p1;
by year month lv_p;
run;

data asie.CHS12;
set asie.all;
m_fexret=(mean1_fexret + mean2_fexret + mean3_fexret)/3;
run;

proc sort data=asie.CHS12;
by lv_p year month;
run; 

/** Adding the factors **/
data asie.CHS13;
set asie.CHS12;


if year ge 1994;
if lv_p ne .;
run;

proc sql;
  create table asie.CHS14 as
  select a.*,b.*
    from asie.CHS13 as a, asie.factorsmonthly2017 as b
    where a.year=b.year 
    and a.month=b.month;
 run;

data asie.CHS15;
set asie.CHS14;
keep year month lv_p m_fexret emkt smb hml rmw cma mom;
run;

proc sort data=asie.CHS15;  
by lv_p year month;
run;

/** Mean returns of the 6 portolios **/

/**proc reg data=asie.CHS15;
by lv_p;
model m_fexret=;
run;

proc reg data=asie.CHS15;
by lv_p;
model m_fexret=emkt;
run;

proc reg data=asie.CHS15;
by lv_p;
model m_fexret=emkt smb hml;
run;

proc reg data=asie.CHS15;
by lv_p;
model m_fexret=emkt smb hml mom;
run;

proc reg data=asie.CHS15;
by lv_p;
model m_fexret=emkt smb hml rmw cma;
run;**/

proc reg data=asie.CHS15;
by lv_p;
model m_fexret=emkt smb hml rmw cma mom;
run;


/** Mean return for the difference between the first and 3rd relative value portfolios **/

proc sort data=asie.chs15;
by year month lv_p;
run;

data asie.chs16;
set asie.chs15;
lag3_lv_p=lag3(lv_p);
lag3_meanf=lag3(m_fexret);
lag3_year=lag3(year);
lag3_month=lag3(month);
if lv_p=2 and lag3_lv_p=-1 and lag3_year=year and lag3_month=month then
p_ret=m_fexret-lag3_meanf;
if lv_p=2; 
run;

proc sort data=asie.chs16;
by year month;
run;

/**proc reg data=asie.CHS16;
model p_ret=;
run;

proc reg data=asie.CHS16;
model p_ret=emkt;
run;

proc reg data=asie.CHS16;
model p_ret=emkt smb hml;
run;

proc reg data=asie.CHS16;
model p_ret=emkt smb hml mom;
run;

proc reg data=asie.CHS16;
model p_ret=emkt smb hml rmw cma;
run;**/

proc reg data=asie.CHS16;
model p_ret=emkt smb hml rmw cma mom;
run;

data asie.chs16;
set asie.chs15;
lag2_lv_p=lag2(lv_p);
lag2_meanf=lag2(m_fexret);
lag2_year=lag2(year);
lag2_month=lag2(month);
if lv_p=2 and lag2_lv_p=0 and lag2_year=year and lag2_month=month then
p_ret=m_fexret-lag2_meanf;
if lv_p=2; 
run;

proc sort data=asie.chs16;
by year month;
run;

/**proc reg data=asie.CHS16;
model p_ret=;
run;

proc reg data=asie.CHS16;
model p_ret=emkt;
run;

proc reg data=asie.CHS16;
model p_ret=emkt smb hml;
run;

proc reg data=asie.CHS16;
model p_ret=emkt smb hml mom;
run;

proc reg data=asie.CHS16;
model p_ret=emkt smb hml rmw cma;
run;**/

proc reg data=asie.CHS16;
model p_ret=emkt smb hml rmw cma mom;
run;



/**************************************************************************
/** holding period of 6 months **/

data asie.ds1;
set asie.model4x;
run;

proc sort data=asie.ds1;
by mentionedticker year month;
run;

data asie.ds2;
set asie.ds1;

   lp6=lag6(mentionedticker);

   lagv1=lvalue_gap;
   lagv2=lag(lvalue_gap);
   lagv3=lag2(lvalue_gap);
   lagv4=lag3(lvalue_gap);
   lagv5=lag4(lvalue_gap); 
   lagv6=lag5(lvalue_gap);
                
   if mentionedticker=lp6 then 

   lv1=lagv1;
   lv2=lagv2;
   lv3=lagv3;
   lv4=lagv4;
   lv5=lagv5; 
   lv6=lagv6;
      
   run;


/** portfolio 6**/

data asie.ds3;
set asie.ds2;
run;

data asie.ds4;
set asie.ds3;
lv_p=lv6;
run;

proc sort data=asie.ds4;
by year month lv_p;
run;

proc means data=asie.ds4 noprint;
by year month lv_p;
var ex_ret /weight = l_size_x;
output out=asie.p6 mean=mean6_fexret;
run;

/** portfolio 5**/

data asie.ds3;
set asie.ds2;
run;

data asie.ds4;
set asie.ds3;
lv_p=lv5;
run;

proc sort data=asie.ds4;
by year month lv_p;
run;

proc means data=asie.ds4 noprint;
by year month lv_p;
var ex_ret /weight = l_size_x;
output out=asie.p5 mean=mean5_fexret;
run;

/** portfolio 4**/

data asie.ds3;
set asie.ds2;
run;

data asie.ds4;
set asie.ds3;
lv_p=lv4;
run;

proc sort data=asie.ds4;
by year month lv_p;
run;

proc means data=asie.ds4 noprint;
by year month lv_p;
var ex_ret /weight = l_size_x;
output out=asie.p4 mean=mean4_fexret;
run;

/** portfolio 3**/

data asie.ds3;
set asie.ds2;
run;

data asie.ds4;
set asie.ds3;
lv_p=lv3;
run;

proc sort data=asie.ds4;
by year month lv_p;
run;

proc means data=asie.ds4 noprint;
by year month lv_p;
var ex_ret /weight = l_size_x;
output out=asie.p3 mean=mean3_fexret;
run;

/** portfolio 2**/

data asie.ds3;
set asie.ds2;
run;

data asie.ds4;
set asie.ds3;
lv_p=lv2;
run;

proc sort data=asie.ds4;
by year month lv_p;
run;

proc means data=asie.ds4 noprint;
by year month lv_p;
var ex_ret /weight = l_size_x;
output out=asie.p2 mean=mean2_fexret;
run;

/** portfolio 1**/

data asie.ds3;
set asie.ds2;
run;

data asie.ds4;
set asie.ds3;
lv_p=lv1;
run;

proc sort data=asie.ds4;
by year month lv_p;
run;

proc means data=asie.ds4 noprint;
by year month lv_p;
var ex_ret /weight = l_size_x;
output out=asie.p1 mean=mean1_fexret;
run;

/** Merge the 6 files **/

data asie.all;
merge  asie.p6 asie.p5 asie.p4 asie.p3 asie.p2 asie.p1;
by year month lv_p;
run;

data asie.CHS12;
set asie.all;
m_fexret=(mean1_fexret + mean2_fexret + mean3_fexret + mean4_fexret + mean5_fexret + mean6_fexret)/6;
run;

proc sort data=asie.CHS12;
by lv_p year month;
run; 

/** Adding the factors **/
data asie.CHS13;
set asie.CHS12;


if year ge 1994;
if lv_p ne .;
run;

proc sql;
  create table asie.CHS14 as
  select a.*,b.*
    from asie.CHS13 as a, asie.factorsmonthly2017 as b
    where a.year=b.year 
    and a.month=b.month;
 run;

data asie.CHS15;
set asie.CHS14;
keep year month lv_p m_fexret emkt smb hml rmw cma mom;
run;

proc sort data=asie.CHS15;  
by lv_p year month;
run;

/** Mean returns of the 6 portolios **/

/**proc reg data=asie.CHS15;
by lv_p;
model m_fexret=;
run;

proc reg data=asie.CHS15;
by lv_p;
model m_fexret=emkt;
run;

proc reg data=asie.CHS15;
by lv_p;
model m_fexret=emkt smb hml;
run;

proc reg data=asie.CHS15;
by lv_p;
model m_fexret=emkt smb hml mom;
run;

proc reg data=asie.CHS15;
by lv_p;
model m_fexret=emkt smb hml rmw cma;
run;**/

proc reg data=asie.CHS15;
by lv_p;
model m_fexret=emkt smb hml rmw cma mom;
run;


/** Mean return for the difference between the first and 3rd relative value portfolios **/

proc sort data=asie.chs15;
by year month lv_p;
run;

data asie.chs16;
set asie.chs15;
lag3_lv_p=lag3(lv_p);
lag3_meanf=lag3(m_fexret);
lag3_year=lag3(year);
lag3_month=lag3(month);
if lv_p=2 and lag3_lv_p=-1 and lag3_year=year and lag3_month=month then
p_ret=m_fexret-lag3_meanf;
if lv_p=2; 
run;

proc sort data=asie.chs16;
by year month;
run;

/**proc reg data=asie.CHS16;
model p_ret=;
run;

proc reg data=asie.CHS16;
model p_ret=emkt;
run;

proc reg data=asie.CHS16;
model p_ret=emkt smb hml;
run;

proc reg data=asie.CHS16;
model p_ret=emkt smb hml mom;
run;

proc reg data=asie.CHS16;
model p_ret=emkt smb hml rmw cma;
run;**/

proc reg data=asie.CHS16;
model p_ret=emkt smb hml rmw cma mom;
run;

data asie.chs16;
set asie.chs15;
lag2_lv_p=lag2(lv_p);
lag2_meanf=lag2(m_fexret);
lag2_year=lag2(year);
lag2_month=lag2(month);
if lv_p=2 and lag2_lv_p=0 and lag2_year=year and lag2_month=month then
p_ret=m_fexret-lag2_meanf;
if lv_p=2; 
run;

proc sort data=asie.chs16;
by year month;
run;

/**proc reg data=asie.CHS16;
model p_ret=;
run;

proc reg data=asie.CHS16;
model p_ret=emkt;
run;

proc reg data=asie.CHS16;
model p_ret=emkt smb hml;
run;

proc reg data=asie.CHS16;
model p_ret=emkt smb hml mom;
run;

proc reg data=asie.CHS16;
model p_ret=emkt smb hml rmw cma;
run;**/

proc reg data=asie.CHS16;
model p_ret=emkt smb hml rmw cma mom;
run;



/**************************************************************************
/** holding period of 12 months **/

data asie.ds1;
set asie.model4x;
run;

proc sort data=asie.ds1;
by mentionedticker year month;
run;

data asie.ds2;
set asie.ds1;

   lp12=lag12(mentionedticker);

   lagv1=lvalue_gap;
   lagv2=lag(lvalue_gap);
   lagv3=lag2(lvalue_gap);
   lagv4=lag3(lvalue_gap);
   lagv5=lag4(lvalue_gap); 
   lagv6=lag5(lvalue_gap);
   lagv7=lag6(lvalue_gap);
   lagv8=lag7(lvalue_gap);
   lagv9=lag8(lvalue_gap);
   lagv10=lag9(lvalue_gap);
   lagv11=lag10(lvalue_gap);
   lagv12=lag11(lvalue_gap);
             
   if mentionedticker=lp12 then 

   lv1=lagv1;
   lv2=lagv2;
   lv3=lagv3;
   lv4=lagv4;
   lv5=lagv5; 
   lv6=lagv6;
   lv7=lagv7;
   lv8=lagv8;
   lv9=lagv9;
   lv10=lagv10;
   lv11=lagv11;
   lv12=lagv12;
   
   run;

/** portfolio 12**/

data asie.ds3;
set asie.ds2;
run;

data asie.ds4;
set asie.ds3;
lv_p=lv12;
run;

proc sort data=asie.ds4;
by year month lv_p;
run;

proc means data=asie.ds4 noprint;
by year month lv_p;
var ex_ret /weight = l_size_x;
output out=asie.p12 mean=mean12_fexret;
run;

/** portfolio 11**/

data asie.ds3;
set asie.ds2;
run;

data asie.ds4;
set asie.ds3;
lv_p=lv11;
run;

proc sort data=asie.ds4;
by year month lv_p;
run;

proc means data=asie.ds4 noprint;
by year month lv_p;
var ex_ret /weight = l_size_x;
output out=asie.p11 mean=mean11_fexret;
run;

/** portfolio 10**/

data asie.ds3;
set asie.ds2;
run;

data asie.ds4;
set asie.ds3;
lv_p=lv10;
run;

proc sort data=asie.ds4;
by year month lv_p;
run;

proc means data=asie.ds4 noprint;
by year month lv_p;
var ex_ret /weight = l_size_x;
output out=asie.p10 mean=mean10_fexret;
run;

/** portfolio 9**/

data asie.ds3;
set asie.ds2;
run;

data asie.ds4;
set asie.ds3;
lv_p=lv9;
run;

proc sort data=asie.ds4;
by year month lv_p;
run;

proc means data=asie.ds4 noprint;
by year month lv_p;
var ex_ret /weight = l_size_x;
output out=asie.p9 mean=mean9_fexret;
run;

/** portfolio 8**/

data asie.ds3;
set asie.ds2;
run;

data asie.ds4;
set asie.ds3;
lv_p=lv8;
run;

proc sort data=asie.ds4;
by year month lv_p;
run;

proc means data=asie.ds4 noprint;
by year month lv_p;
var ex_ret /weight = l_size_x;
output out=asie.p8 mean=mean8_fexret;
run;

/** portfolio 7**/

data asie.ds3;
set asie.ds2;
run;

data asie.ds4;
set asie.ds3;
lv_p=lv7;
run;

proc sort data=asie.ds4;
by year month lv_p;
run;

proc means data=asie.ds4 noprint;
by year month lv_p;
var ex_ret /weight = l_size_x;
output out=asie.p7 mean=mean7_fexret;
run;

/** portfolio 6**/

data asie.ds3;
set asie.ds2;
run;

data asie.ds4;
set asie.ds3;
lv_p=lv6;
run;

proc sort data=asie.ds4;
by year month lv_p;
run;

proc means data=asie.ds4 noprint;
by year month lv_p;
var ex_ret /weight = l_size_x;
output out=asie.p6 mean=mean6_fexret;
run;

/** portfolio 5**/

data asie.ds3;
set asie.ds2;
run;

data asie.ds4;
set asie.ds3;
lv_p=lv5;
run;

proc sort data=asie.ds4;
by year month lv_p;
run;

proc means data=asie.ds4 noprint;
by year month lv_p;
var ex_ret /weight = l_size_x;
output out=asie.p5 mean=mean5_fexret;
run;

/** portfolio 4**/

data asie.ds3;
set asie.ds2;
run;

data asie.ds4;
set asie.ds3;
lv_p=lv4;
run;

proc sort data=asie.ds4;
by year month lv_p;
run;

proc means data=asie.ds4 noprint;
by year month lv_p;
var ex_ret /weight = l_size_x;
output out=asie.p4 mean=mean4_fexret;
run;

/** portfolio 3**/

data asie.ds3;
set asie.ds2;
run;

data asie.ds4;
set asie.ds3;
lv_p=lv3;
run;

proc sort data=asie.ds4;
by year month lv_p;
run;

proc means data=asie.ds4 noprint;
by year month lv_p;
var ex_ret /weight = l_size_x;
output out=asie.p3 mean=mean3_fexret;
run;

/** portfolio 2**/

data asie.ds3;
set asie.ds2;
run;

data asie.ds4;
set asie.ds3;
lv_p=lv2;
run;

proc sort data=asie.ds4;
by year month lv_p;
run;

proc means data=asie.ds4 noprint;
by year month lv_p;
var ex_ret /weight = l_size_x;
output out=asie.p2 mean=mean2_fexret;
run;

/** portfolio 1**/

data asie.ds3;
set asie.ds2;
run;

data asie.ds4;
set asie.ds3;
lv_p=lv1;
run;

proc sort data=asie.ds4;
by year month lv_p;
run;

proc means data=asie.ds4 noprint;
by year month lv_p;
var ex_ret /weight = l_size_x;
output out=asie.p1 mean=mean1_fexret;
run;

/** Merge the 12 files **/

data asie.all;
merge asie.p12 asie.p11 asie.p10 asie.p9 asie.p8 asie.p7
      asie.p6 asie.p5 asie.p4 asie.p3 asie.p2 asie.p1;
by year month lv_p;
run;

data asie.CHS12;
set asie.all;
m_fexret=(mean1_fexret + mean2_fexret + mean3_fexret + mean4_fexret + mean5_fexret + mean6_fexret
          + mean7_fexret + mean8_fexret + mean9_fexret + mean10_fexret + mean11_fexret + mean12_fexret)/12;
run;

proc sort data=asie.CHS12;
by lv_p year month;
run; 

/** Adding the factors **/
data asie.CHS13;
set asie.CHS12;


if year ge 1994;
if lv_p ne .;
run;

proc sql;
  create table asie.CHS14 as
  select a.*,b.*
    from asie.CHS13 as a, asie.factorsmonthly2017 as b
    where a.year=b.year 
    and a.month=b.month;
 run;

data asie.CHS15;
set asie.CHS14;
keep year month lv_p m_fexret emkt smb hml rmw cma mom;
run;

proc sort data=asie.CHS15;  
by lv_p year month;
run;

/** Mean returns of the 12 portolios **/

/**proc reg data=asie.CHS15;
by lv_p;
model m_fexret=;
run;

proc reg data=asie.CHS15;
by lv_p;
model m_fexret=emkt;
run;

proc reg data=asie.CHS15;
by lv_p;
model m_fexret=emkt smb hml;
run;

proc reg data=asie.CHS15;
by lv_p;
model m_fexret=emkt smb hml mom;
run;

proc reg data=asie.CHS15;
by lv_p;
model m_fexret=emkt smb hml rmw cma;
run;**/

proc reg data=asie.CHS15;
by lv_p;
model m_fexret=emkt smb hml rmw cma mom;
run;



/** Mean return for the difference between the first and 3rd relative value portfolios **/

proc sort data=asie.chs15;
by year month lv_p;
run;

data asie.chs16;
set asie.chs15;
lag3_lv_p=lag3(lv_p);
lag3_meanf=lag3(m_fexret);
lag3_year=lag3(year);
lag3_month=lag3(month);
if lv_p=2 and lag3_lv_p=-1 and lag3_year=year and lag3_month=month then
p_ret=m_fexret-lag3_meanf;
if lv_p=2; 
run;

proc sort data=asie.chs16;
by year month;
run;

/**proc reg data=asie.CHS16;
model p_ret=;
run;

proc reg data=asie.CHS16;
model p_ret=emkt;
run;

proc reg data=asie.CHS16;
model p_ret=emkt smb hml;
run;

proc reg data=asie.CHS16;
model p_ret=emkt smb hml mom;
run;

proc reg data=asie.CHS16;
model p_ret=emkt smb hml rmw cma;
run;**/

proc reg data=asie.CHS16;
model p_ret=emkt smb hml rmw cma mom;
run;

data asie.chs16;
set asie.chs15;
lag2_lv_p=lag2(lv_p);
lag2_meanf=lag2(m_fexret);
lag2_year=lag2(year);
lag2_month=lag2(month);
if lv_p=2 and lag2_lv_p=0 and lag2_year=year and lag2_month=month then
p_ret=m_fexret-lag2_meanf;
if lv_p=2; 
run;

proc sort data=asie.chs16;
by year month;
run;

/**proc reg data=asie.CHS16;
model p_ret=;
run;

proc reg data=asie.CHS16;
model p_ret=emkt;
run;

proc reg data=asie.CHS16;
model p_ret=emkt smb hml;
run;

proc reg data=asie.CHS16;
model p_ret=emkt smb hml mom;
run;

proc reg data=asie.CHS16;
model p_ret=emkt smb hml rmw cma;
run;**/

proc reg data=asie.CHS16;
model p_ret=emkt smb hml rmw cma mom;
run;



/**************************************************************************
/** holding period of 18 months **/

data asie.ds1;
set asie.model4x;
run;

proc sort data=asie.ds1;
by mentionedticker year month;
run;

data asie.ds2;
set asie.ds1;

   lp18=lag18(mentionedticker);

   lagv1=lvalue_gap;
   lagv2=lag(lvalue_gap);
   lagv3=lag2(lvalue_gap);
   lagv4=lag3(lvalue_gap);
   lagv5=lag4(lvalue_gap); 
   lagv6=lag5(lvalue_gap);
   lagv7=lag6(lvalue_gap);
   lagv8=lag7(lvalue_gap);
   lagv9=lag8(lvalue_gap);
   lagv10=lag9(lvalue_gap);
   lagv11=lag10(lvalue_gap);
   lagv12=lag11(lvalue_gap);
   lagv13=lag12(lvalue_gap);
   lagv14=lag13(lvalue_gap);
   lagv15=lag14(lvalue_gap);
   lagv16=lag15(lvalue_gap);
   lagv17=lag16(lvalue_gap);
   lagv18=lag17(lvalue_gap);
           
   if mentionedticker=lp18 then 

   lv1=lagv1;
   lv2=lagv2;
   lv3=lagv3;
   lv4=lagv4;
   lv5=lagv5; 
   lv6=lagv6;
   lv7=lagv7;
   lv8=lagv8;
   lv9=lagv9;
   lv10=lagv10;
   lv11=lagv11;
   lv12=lagv12;
   lv13=lagv13;
   lv14=lagv14;
   lv15=lagv15;
   lv16=lagv16;
   lv17=lagv17;
   lv18=lagv18;

   run;

/** portfolio 18**/

data asie.ds3;
set asie.ds2;
run;

data asie.ds4;
set asie.ds3;
lv_p=lv18;
run;

proc sort data=asie.ds4;
by year month lv_p;
run;

proc means data=asie.ds4 noprint;
by year month lv_p;
var ex_ret /weight = l_size_x;
output out=asie.p18 mean=mean18_fexret;
run;

/** portfolio 17**/

data asie.ds3;
set asie.ds2;
run;

data asie.ds4;
set asie.ds3;
lv_p=lv17;
run;

proc sort data=asie.ds4;
by year month lv_p;
run;

proc means data=asie.ds4 noprint;
by year month lv_p;
var ex_ret /weight = l_size_x;
output out=asie.p17 mean=mean17_fexret;
run;

/** portfolio 16**/

data asie.ds3;
set asie.ds2;
run;

data asie.ds4;
set asie.ds3;
lv_p=lv16;
run;

proc sort data=asie.ds4;
by year month lv_p;
run;

proc means data=asie.ds4 noprint;
by year month lv_p;
var ex_ret /weight = l_size_x;
output out=asie.p16 mean=mean16_fexret;
run;

/** portfolio 15**/

data asie.ds3;
set asie.ds2;
run;

data asie.ds4;
set asie.ds3;
lv_p=lv15;
run;

proc sort data=asie.ds4;
by year month lv_p;
run;

proc means data=asie.ds4 noprint;
by year month lv_p;
var ex_ret /weight = l_size_x;
output out=asie.p15 mean=mean15_fexret;
run;

/** portfolio 14**/

data asie.ds3;
set asie.ds2;
run;

data asie.ds4;
set asie.ds3;
lv_p=lv14;
run;

proc sort data=asie.ds4;
by year month lv_p;
run;

proc means data=asie.ds4 noprint;
by year month lv_p;
var ex_ret /weight = l_size_x;
output out=asie.p14 mean=mean14_fexret;
run;

/** portfolio 13**/

data asie.ds3;
set asie.ds2;
run;

data asie.ds4;
set asie.ds3;
lv_p=lv13;
run;

proc sort data=asie.ds4;
by year month lv_p;
run;

proc means data=asie.ds4 noprint;
by year month lv_p;
var ex_ret /weight = l_size_x;
output out=asie.p13 mean=mean13_fexret;
run;

/** portfolio 12**/

data asie.ds3;
set asie.ds2;
run;

data asie.ds4;
set asie.ds3;
lv_p=lv12;
run;

proc sort data=asie.ds4;
by year month lv_p;
run;

proc means data=asie.ds4 noprint;
by year month lv_p;
var ex_ret /weight = l_size_x;
output out=asie.p12 mean=mean12_fexret;
run;

/** portfolio 11**/

data asie.ds3;
set asie.ds2;
run;

data asie.ds4;
set asie.ds3;
lv_p=lv11;
run;

proc sort data=asie.ds4;
by year month lv_p;
run;

proc means data=asie.ds4 noprint;
by year month lv_p;
var ex_ret /weight = l_size_x;
output out=asie.p11 mean=mean11_fexret;
run;

/** portfolio 10**/

data asie.ds3;
set asie.ds2;
run;

data asie.ds4;
set asie.ds3;
lv_p=lv10;
run;

proc sort data=asie.ds4;
by year month lv_p;
run;

proc means data=asie.ds4 noprint;
by year month lv_p;
var ex_ret /weight = l_size_x;
output out=asie.p10 mean=mean10_fexret;
run;

/** portfolio 9**/

data asie.ds3;
set asie.ds2;
run;

data asie.ds4;
set asie.ds3;
lv_p=lv9;
run;

proc sort data=asie.ds4;
by year month lv_p;
run;

proc means data=asie.ds4 noprint;
by year month lv_p;
var ex_ret /weight = l_size_x;
output out=asie.p9 mean=mean9_fexret;
run;

/** portfolio 8**/

data asie.ds3;
set asie.ds2;
run;

data asie.ds4;
set asie.ds3;
lv_p=lv8;
run;

proc sort data=asie.ds4;
by year month lv_p;
run;

proc means data=asie.ds4 noprint;
by year month lv_p;
var ex_ret /weight = l_size_x;
output out=asie.p8 mean=mean8_fexret;
run;

/** portfolio 7**/

data asie.ds3;
set asie.ds2;
run;

data asie.ds4;
set asie.ds3;
lv_p=lv7;
run;

proc sort data=asie.ds4;
by year month lv_p;
run;

proc means data=asie.ds4 noprint;
by year month lv_p;
var ex_ret /weight = l_size_x;
output out=asie.p7 mean=mean7_fexret;
run;

/** portfolio 6**/

data asie.ds3;
set asie.ds2;
run;

data asie.ds4;
set asie.ds3;
lv_p=lv6;
run;

proc sort data=asie.ds4;
by year month lv_p;
run;

proc means data=asie.ds4 noprint;
by year month lv_p;
var ex_ret /weight = l_size_x;
output out=asie.p6 mean=mean6_fexret;
run;

/** portfolio 5**/

data asie.ds3;
set asie.ds2;
run;

data asie.ds4;
set asie.ds3;
lv_p=lv5;
run;

proc sort data=asie.ds4;
by year month lv_p;
run;

proc means data=asie.ds4 noprint;
by year month lv_p;
var ex_ret /weight = l_size_x;
output out=asie.p5 mean=mean5_fexret;
run;

/** portfolio 4**/

data asie.ds3;
set asie.ds2;
run;

data asie.ds4;
set asie.ds3;
lv_p=lv4;
run;

proc sort data=asie.ds4;
by year month lv_p;
run;

proc means data=asie.ds4 noprint;
by year month lv_p;
var ex_ret /weight = l_size_x;
output out=asie.p4 mean=mean4_fexret;
run;

/** portfolio 3**/

data asie.ds3;
set asie.ds2;
run;

data asie.ds4;
set asie.ds3;
lv_p=lv3;
run;

proc sort data=asie.ds4;
by year month lv_p;
run;

proc means data=asie.ds4 noprint;
by year month lv_p;
var ex_ret /weight = l_size_x;
output out=asie.p3 mean=mean3_fexret;
run;

/** portfolio 2**/

data asie.ds3;
set asie.ds2;
run;

data asie.ds4;
set asie.ds3;
lv_p=lv2;
run;

proc sort data=asie.ds4;
by year month lv_p;
run;

proc means data=asie.ds4 noprint;
by year month lv_p;
var ex_ret /weight = l_size_x;
output out=asie.p2 mean=mean2_fexret;
run;

/** portfolio 1**/

data asie.ds3;
set asie.ds2;
run;

data asie.ds4;
set asie.ds3;
lv_p=lv1;
run;

proc sort data=asie.ds4;
by year month lv_p;
run;

proc means data=asie.ds4 noprint;
by year month lv_p;
var ex_ret /weight = l_size_x;
output out=asie.p1 mean=mean1_fexret;
run;

/** Merge the 18 files **/

data asie.all;
merge asie.p18 asie.p17 asie.p16 asie.p15 asie.p14 asie.p13 asie.p12 asie.p11 asie.p10 asie.p9 asie.p8 asie.p7
      asie.p6 asie.p5 asie.p4 asie.p3 asie.p2 asie.p1;
by year month lv_p;
run;

data asie.CHS12;
set asie.all;
m_fexret=(mean1_fexret + mean2_fexret + mean3_fexret + mean4_fexret + mean5_fexret + mean6_fexret
          + mean7_fexret + mean8_fexret + mean9_fexret + mean10_fexret + mean11_fexret + mean12_fexret
          + mean13_fexret + mean14_fexret + mean15_fexret + mean16_fexret + mean17_fexret + mean18_fexret)/18;
run;

proc sort data=asie.CHS12;
by lv_p year month;
run; 

/** Adding the factors **/
data asie.CHS13;
set asie.CHS12;


if year ge 1994;
if lv_p ne .;
run;

proc sql;
  create table asie.CHS14 as
  select a.*,b.*
    from asie.CHS13 as a, asie.factorsmonthly2017 as b
    where a.year=b.year 
    and a.month=b.month;
 run;

data asie.CHS15;
set asie.CHS14;
keep year month lv_p m_fexret emkt smb hml rmw cma mom;
run;

proc sort data=asie.CHS15;  
by lv_p year month;
run;

/** Mean returns of the 18 portolios **/

/**proc reg data=asie.CHS15;
by lv_p;
model m_fexret=;
run;

proc reg data=asie.CHS15;
by lv_p;
model m_fexret=emkt;
run;

proc reg data=asie.CHS15;
by lv_p;
model m_fexret=emkt smb hml;
run;

proc reg data=asie.CHS15;
by lv_p;
model m_fexret=emkt smb hml mom;
run;

proc reg data=asie.CHS15;
by lv_p;
model m_fexret=emkt smb hml rmw cma;
run;**/

proc reg data=asie.CHS15;
by lv_p;
model m_fexret=emkt smb hml rmw cma mom;
run;


/** Mean return for the difference between the first and 3rd relative value portfolios **/

proc sort data=asie.chs15;
by year month lv_p;
run;

data asie.chs16;
set asie.chs15;
lag3_lv_p=lag3(lv_p);
lag3_meanf=lag3(m_fexret);
lag3_year=lag3(year);
lag3_month=lag3(month);
if lv_p=2 and lag3_lv_p=-1 and lag3_year=year and lag3_month=month then
p_ret=m_fexret-lag3_meanf;
if lv_p=2; 
run;

proc sort data=asie.chs16;
by year month;
run;

/**proc reg data=asie.CHS16;
model p_ret=;
run;

proc reg data=asie.CHS16;
model p_ret=emkt;
run;

proc reg data=asie.CHS16;
model p_ret=emkt smb hml;
run;

proc reg data=asie.CHS16;
model p_ret=emkt smb hml mom;
run;

proc reg data=asie.CHS16;
model p_ret=emkt smb hml rmw cma;
run;**/

proc reg data=asie.CHS16;
model p_ret=emkt smb hml rmw cma mom;
run;

data asie.chs16;
set asie.chs15;
lag2_lv_p=lag2(lv_p);
lag2_meanf=lag2(m_fexret);
lag2_year=lag2(year);
lag2_month=lag2(month);
if lv_p=2 and lag2_lv_p=0 and lag2_year=year and lag2_month=month then
p_ret=m_fexret-lag2_meanf;
if lv_p=2; 
run;

proc sort data=asie.chs16;
by year month;
run;

/**proc reg data=asie.CHS16;
model p_ret=;
run;

proc reg data=asie.CHS16;
model p_ret=emkt;
run;

proc reg data=asie.CHS16;
model p_ret=emkt smb hml;
run;

proc reg data=asie.CHS16;
model p_ret=emkt smb hml mom;
run;

proc reg data=asie.CHS16;
model p_ret=emkt smb hml rmw cma;
run;**/

proc reg data=asie.CHS16;
model p_ret=emkt smb hml rmw cma mom;
run;


/***************************************************/
/** TABLE 15 
/***************************************************/

proc sql;
  create table asie.crank_file_a as
  select a.*,b.*
    from asie.crank_file as a left join asie.firm_characteristics as b /** The 'crank_file' file contains the firms' crank values with 3 month lags.
	                                                                       l3pagerank, l3pagerank_inter, and l3pagerank_in are the cranks of all mentions,
	                                                                       cross-sector mentions, and within-sector mentions, respectively. 
	                                                                       l3rank_pr1, l3rank_pr1_inter, and l3rank_pr1_in equal to 0 for lowest pagerank values 
	                                                                       (no mentions), and 1,2,3 by equal sorting for the remaining pagerank values (firms with mentions) **/
    on a.ticker=b.tic 
    and a.year=b.year
	and a.month=b.month;
 run;

proc sql;
  create table asie.crank_file_aa as
  select a.*,b.*
    from asie.crank_file_a as a left join asie.size_breakpoints_new as b 
    on a.year=b.year 
    and a.month=b.month;
 run;

data asie.crank_file_b;
set asie.crank_file_aa;
if l_size ne .;
l_size_x=l_size;
if l3pagerank ne .;
if abs(prc) ge 5;
run;

proc sort data=asie.crank_file_b;
by year month l_size;
run;

data asie.crank_file_c;
set asie.crank_file_b;
if l_size/1000 le p10 then do;
r_var=0;
end;
else do;
if l_size/1000 le p20 then do;
r_var=1;
end;
else do;
if l_size/1000 le p30 then do;
r_var=2;
end;
else do;
if l_size/1000 le p40 then do;
r_var=3;
end;
else do;
if l_size/1000 le p50 then do;
r_var=4;
end;
else do;
if l_size/1000 le p60 then do;
r_var=5;
end;
else do;
if l_size/1000 le p70 then do;
r_var=6;
end;
else do;
if l_size/1000 le p80 then do;
r_var=7;
end;
else do;
if l_size/1000 le p90 then do;
r_var=8;
end;
else do;
r_var=9;
end;
end;
end;
end;
end;
end;
end;
end;
end;
run;

proc freq data=asie.crank_file_c;
tables r_var;
run;

proc univariate data=asie.crank_file_c;
var l_size;
run;

data asie.crank_file_d;
set asie.crank_file_c;
if r_var ge 2;
run;

proc univariate data=asie.crank_file_d;
var l_size;
run;


/***************************/
/** Single sort **/
/***************************/

/** sorting crank into a no-mentioned group, and equally to low, mid, and high crank groups **/

data asie.qf6_full_no_mention;
set asie.crank_file_d;
if l3rank_pr1=0;
gr_decile=-1;
run;

data asie.qf6_full_mention;
set asie.crank_file_d;
if l3rank_pr1>0;
run;

proc sort data=asie.qf6_full_mention;
by year month l3pagerank;
run;

proc rank groups=3 data=asie.qf6_full_mention out=asie.qf6_full_mention_a;
var l3pagerank; 
ranks gr_decile;
by year month /**r_var**/;
run;

data asie.qf6_full_d;
set asie.qf6_full_no_mention asie.qf6_full_mention_a;
run;

/** Calculating average excess returns **/

data asie.qf7a;
set asie.qf6_full_d;
/**if l3rank_pr1 > 0;**/ /** use this condition for only mentioned firms **/
run;

proc sort data=asie.qf7a;
by year month gr_decile;
run;

proc means data=asie.qf7a noprint;
by year month gr_decile;
var ex_ret/weight=l_size_x; 
output out=asie.qf8 mean=mean_exret;
run;

/** Adding the factors **/

data asie.qf8a;
set asie.qf8;
if _freq_ ge 5;
run;

proc sql;
  create table asie.qf9 as
  select a.*,b.*
    from asie.qf8a as a, asie.factorsmonthly2017 as b
    where a.year=b.year
    and a.month=b.month;
 run;

data asie.qf9a;
set asie.qf9;
if year ge 1994;
run;

proc sort data=asie.qf9a;
by year month gr_decile;
run;

data asie.qf9b;
set asie.qf9a;
by year month gr_decile;
if first.month then do x=0;
end;
x+1;
run;

proc sort data=asie.qf9b;
by descending year descending month descending x;
run;

data asie.qf9c;
set asie.qf9b;
by descending year descending month descending x;
if first.month then do y=x;
end;
y+0;
run;

proc sort data=asie.qf9c;
by year month gr_decile;
run;

data asie.qf10;
set asie.qf9c;
if y=4;
run;

proc sort data=asie.qf10; 
by gr_decile year month;
run;

proc means data=asie.qf10 noprint;
by gr_decile;
var _freq_; 
output out=asie.qf10_port mean=firm_port;
proc print;
run;

proc sort data=asie.qf10; 
by gr_decile year month;
run;

proc reg data=asie.qf10;
by gr_decile;
model mean_exret=;
run;

/**proc reg data=asie.qf10;
by gr_decile;
model mean_exret=emkt;
run;

proc reg data=asie.qf10;
by gr_decile;
model mean_exret=emkt smb hml;
run;

proc reg data=asie.qf10;
by gr_decile;
model mean_exret=emkt smb hml mom;
run;

proc reg data=asie.qf10;
by gr_decile;
model mean_exret=emkt smb hml rmw cma;
run;**/

proc reg data=asie.qf10;
by gr_decile;
model mean_exret=emkt smb hml rmw cma mom;
run;


/** Mean return for the difference between the 10th and the first portfolios **/

proc sort data=asie.qf10;
by year month gr_decile;
run;

/** high mention vs no mention **/

data asie.qf11; /** this is for portfolio 5-0 **/
set asie.qf10;
lag3_grd=lag3(gr_decile);
lag3_meanf=lag3(mean_exret);
lag3year=lag3(year);
if gr_decile=2 and lag3_grd=-1 and lag3year=year then
p_ret=mean_exret-lag3_meanf;
if gr_decile=2; 
run;

/** high mention vs low mention **/

data asie.qf11; /** this is for portfolio 5-0 **/
set asie.qf10;
lag2_grd=lag2(gr_decile);
lag2_meanf=lag2(mean_exret);
lag2year=lag2(year);
if gr_decile=2 and lag2_grd=0 and lag2year=year then
p_ret=mean_exret-lag2_meanf;
if gr_decile=2; 
run;

proc sort data=asie.qf11;
by year month;
run;

proc reg data=asie.qf11;
model p_ret=;
run;

/**proc reg data=asie.qf11;
model p_ret=emkt;
run;

proc reg data=asie.qf11;
model p_ret=emkt smb hml;
run;

proc reg data=asie.qf11;
model p_ret=emkt smb hml mom;
run;

proc reg data=asie.qf11;
model p_ret=emkt smb hml rmw cma;
run;**/

proc reg data=asie.qf11;
model p_ret=emkt smb hml rmw cma mom;
run;


/** Time series returns and alphas **/

proc sort data=asie.qf11; /** asie.CHS16 for long horizon **/
by year month;
run;

data asie.pr1;
set asie.qf11; /** asie.CHS16 for long horizon **/
FF6_alpha=p_ret-(0.02671*emkt+0.09160*smb-0.14264*hml-0.20821*RMW-0.09088*CMA-0.10648*MOM);
keep year month emkt p_ret FF6_alpha;
proc print data=asie.pr1;
run;

data asie.pr1;
set asie.qf11; /** asie.CHS16 for long horizon **/
FF6_alpha=p_ret-(0.02627*emkt-0.02233*smb-0.32008*hml-0.35764*RMW+0.06769*CMA-0.20508*MOM);
keep year month emkt p_ret FF6_alpha;
proc print data=asie.pr1;
run;

ods excel file='/sas files/aaa.xlsx';
proc print data=asie.pr1;
run;
ods excel close;


**********************************************************************************************************
* Double sorted portfolios - work on the files form the code 'Code_monthly_full_industry'
**********************************************************************************************************;

data asie.qf5;
set asie.crank_file_c; 
if abs(prc) ge 5;
if r_var ge 2;
an_var=l_size_x; /** change l_Size Mtbq l_Past6_ret Profitability3 Investment3 l_Beta l_Idiovol**/
if an_var ne .;
keep year month ex_ret an_var l3pagerank l3rank_pr1 l_size_x;
run;

proc sort data=asie.qf5; 
by year month an_var; 
run;

proc rank groups=3 data=asie.qf5 out=asie.qf6;
var an_var; 
ranks r_var;
by year month;
run;

data asie.qf6_full_no_mention;
set asie.qf6;
if l3rank_pr1=0;
gr_decile=-1;
run;

data asie.qf6_full_mention;
set asie.qf6;
if l3rank_pr1>0;
run;

proc sort data=asie.qf6_full_mention;
by year month r_var l3pagerank;
run;

proc rank groups=3 data=asie.qf6_full_mention out=asie.qf6_full_mention_a;
var l3pagerank; 
ranks gr_decile;
by year month r_var;
run;

data asie.qf6_full_d;
set asie.qf6_full_no_mention asie.qf6_full_mention_a;
run;

/** Calculating average excess returns **/

proc sort data=asie.qf6_full_d;
by year month r_var gr_decile;
run;

proc means data=asie.qf6_full_d noprint;
by year month r_var gr_decile;
var ex_ret/weight=l_size_x; 
output out=asie.qf8 mean=mean_exret;
run;

/** Adding the factors **/

data asie.qf8a;
set asie.qf8;
if _freq_ ge 5;
run;

proc sql;
  create table asie.qf9 as
  select a.*,b.*
    from asie.qf8a as a, asie.factorsmonthly2017 as b
    where a.year=b.year
    and a.month=b.month;
 run;

data asie.qf9a;
set asie.qf9;
if year ge 1994;
run;

proc sort data=asie.qf9a;
by year month r_var gr_decile;
run;

data asie.qf9b;
set asie.qf9a;
by year month r_var gr_decile;
if first.month then do x=0;
end;
x+1;
run;

proc sort data=asie.qf9b;
by descending year descending month descending x;
run;

data asie.qf9c;
set asie.qf9b;
by descending year descending month descending x;
if first.month then do y=x;
end;
y+0;
run;

proc sort data=asie.qf9c;
by year month gr_decile;
run;

data asie.qf10;
set asie.qf9c;
if y=12;
run;

proc sort data=asie.qf10; 
by gr_decile year month;
run;

proc means data=asie.qf10 noprint;
by gr_decile;
var _freq_; 
output out=asie.qf10_port mean=firm_port;
proc print;
run;

proc sort data=asie.qf10; 
by r_var gr_decile year month;
run;

/** mean of the return across the an_var quintiles **/

proc sort data=asie.qf10;
by year month gr_decile;
run;

proc means data=asie.qf10 noprint;
by year month gr_decile;
var year month mean_exret emkt smb hml rmw cma mom;
output out=asie.qf10a mean=year month mean_exret emkt smb hml rmw cma mom;
run;

proc sort data=asie.qf10; 
by gr_decile year month;
run;

proc reg data=asie.qf10;
by gr_decile;
model mean_exret=;
run;

proc reg data=asie.qf10;
by gr_decile;
model mean_exret=emkt smb hml rmw cma mom;
run;

/** Mean return for the difference between the 10th and the first portfolios **/

proc sort data=asie.qf10a;
by year month gr_decile;
run;

/** high mention vs no mention **/

data asie.qf11; 
set asie.qf10a;
lag3_grd=lag3(gr_decile);
lag3_meanf=lag3(mean_exret);
lag3year=lag3(year);
if gr_decile=2 and lag3_grd=-1 and lag3year=year then
p_ret=mean_exret-lag3_meanf;
if gr_decile=2; 
run;

/** high mention vs low mention **/

data asie.qf11; 
set asie.qf10a;
lag2_grd=lag2(gr_decile);
lag2_meanf=lag2(mean_exret);
lag2year=lag2(year);
if gr_decile=2 and lag2_grd=0 and lag2year=year then
p_ret=mean_exret-lag2_meanf;
if gr_decile=2; 
run;

proc sort data=asie.qf11;
by year month;
run;

proc reg data=asie.qf11;
model p_ret=;
run;
quit;

proc reg data=asie.qf11;
model p_ret=emkt smb hml rmw cma mom;
run;
quit;




